@inproceedings{3a32c384b9fb4ba6862a7774620ddff3,
title = "Improving web spam classifiers using link structure",
abstract = "Web spam has been recognized as one of the top challenges in the search engine industry [14]. A lot of recent work has addressed the problem of detecting or demoting web spam, in-cluding both content spam [16, 12] and link spam [22, 13].However, any time an anti-spam technique is developed, spam-mers will design new spamming techniques to confuse search engine ranking methods and spam detection mechanisms. Ma-chine learning-based classification methods can quickly adapt to newly developed spam techniques. We describe a two-stage approach to improve the performance of common classifiers. We first implement a classifer to catch a large portion of spam in our data. Then we design several heuristics to decide if a node should be relabeled based on the preclassifed result and knowledge about the neighborhood. Our experimental results show visible improvements with respect to precision and recall.",
keywords = "Classification, Link analy-sis, Machine learning, Search engines, Web mining, Web spam detection",
author = "Qingqing Gan and Torsten Suel",
year = "2007",
doi = "10.1145/1244408.1244412",
language = "English (US)",
isbn = "1595937323",
series = "ACM International Conference Proceeding Series",
pages = "17--20",
booktitle = "AIRWeb 2007 - Proceedings of the 3rd International Workshop on Adversarial Information Retrieval on the Web",
note = "AIRWeb 2007 - 3rd International Workshop on Adversarial Information Retrieval on the Web ; Conference date: 08-05-2007 Through 08-05-2007",
}