@inproceedings{cfe1f5b3477141f2942e37cc8817ec92,
title = "Toward robust classification using the Open Directory Project",
abstract = "The Open Directory Project (ODP) is a large scale, high quality and publicly available web directory utilized in many studies and real-world applications. In this paper, we explore training data expansion techniques for text classification as one of the possible directions to deal with the sparse characteristic of the ODP dataset. We propose a dozen classification methods, which can be differentiated by (1) from which categories training data is expanded, and (2) how the expanded training data is merged to generate centroid vectors. Evaluation results show that training data expansion significantly improves the classification performance more than representative classifiers. We also find that (1) child and descendant categories are more valuable sources to expand training data than parent and ancestor categories, and (2) distance-based weighting is superior to simple averaging to merge the expanded training data.",
author = "Jongwoo Ha and Lee, {Jung Hyun} and Jang, {Won Jun} and Lee, {Yong Ku} and Sang-Geun Lee",
note = "Publisher Copyright: {\textcopyright} 2014 IEEE.; 2014 IEEE International Conference on Data Science and Advanced Analytics, DSAA 2014 ; Conference date: 30-10-2014 Through 01-11-2014",
year = "2014",
month = mar,
day = "10",
doi = "10.1109/DSAA.2014.7058134",
language = "English",
series = "DSAA 2014 - Proceedings of the 2014 IEEE International Conference on Data Science and Advanced Analytics",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "607--612",
editor = "George Karypis and Longbing Cao and Wei Wang and Irwin King",
booktitle = "DSAA 2014 - Proceedings of the 2014 IEEE International Conference on Data Science and Advanced Analytics",
}