@inproceedings{df4a1308c98c4eb482589772f6419934,
title = "An adaptive crawler for locating hiddenwebentry points",
abstract = "In this paper we describe new adaptive crawling strategies to efficiently locate the entry points to hidden-Web sources. The fact that hidden-Web sources are very sparsely distributedmakes the problem of locating them especially challenging. We deal with this problem by using the contents ofpages to focus the crawl on a topic; by prioritizing promisinglinks within the topic; and by also following links that may not lead to immediate benefit. We propose a new frameworkwhereby crawlers automatically learn patterns of promisinglinks and adapt their focus as the crawl progresses, thus greatly reducing the amount of required manual setup andtuning. Our experiments over real Web pages in a representativeset of domains indicate that online learning leadsto significant gains in harvest rates' the adaptive crawlers retrieve up to three times as many forms as crawlers thatuse a fixed focus strategy.",
keywords = "HiddenWeb, Learning classifiers, Online learning, Web crawling strategies",
author = "Luciano Barbosa and Juliana Freire",
year = "2007",
doi = "10.1145/1242572.1242632",
language = "English (US)",
isbn = "1595936548",
series = "16th International World Wide Web Conference, WWW2007",
pages = "441--450",
booktitle = "16th International World Wide Web Conference, WWW2007",
note = "16th International World Wide Web Conference, WWW2007 ; Conference date: 08-05-2007 Through 12-05-2007",
}