@inproceedings{12712b3640904349a5c77d4e31b99052,
title = "VoldemortKG: Mapping schema.org and web entities to linked open data",
abstract = "Increasingly, webpages mix entities coming from various sources and represented in different ways. It can thus happen that the same entity is both described by using schema.org annotations and by creating a text anchor pointing to its Wikipedia page. Often, those representations provide complementary information which is not exploited since those entities are disjoint. We explored the extent to which entities represented in different ways repeat on the Web, how they are related, and how they complement (or link) to each other. Our initial experiments showed that we can unveil a previously unexploited knowledge graph by applying simple instance matching techniques on a large collection of schema.org annotations and Wikipedia. The resulting knowledge graph aggregates entities (often tail entities) scattered across several webpages, and complements existing Wikipedia entities with new facts and properties. In order to facilitate further investigation in how to mine such information, we are releasing (i) an excerpt of all Common Crawl webpages containing both Wikipedia and schema.org annotations, (ii) the toolset to extract this information and perform knowledge graph construction and mapping onto DBpedia, as well as (iii) the resulting knowledge graph (VoldemortKG) obtained via label matching techniques.",
keywords = "Data integration, Dataset, Instance matching, Knowledge graphs, Schema.org",
author = "Alberto Tonon and Victor Felder and Difallah, {Djellel Eddine} and Philippe Cudr{\'e}-Mauroux",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing AG 2016.",
year = "2016",
doi = "10.1007/978-3-319-46547-0_23",
language = "English (US)",
isbn = "9783319465463",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "220--228",
editor = "Marta Sabou and Freddy Lecue and Paul Groth and Elena Simperl and Markus Krotzsch and Freddy Lecue and Alasdair Gray and Fabian Flock and Yolanda Gil",
booktitle = "The Semantic Web - ISWC 2016 - 15th International Semantic Web Conference, 2016, Proceedings",
}