@inproceedings{d378a67074524a06a4a2688ae4789c78,
title = "Contextual postprocessing of a Korean OCR system by linguistic constraints",
abstract = "The approach in this paper focuses on the contextual postprocessing by selecting the most feasible word from multiple output strings of an OCR system. The correction is applied only when the selection fails. The selected word is confirmed by the collocation between the word and the adjacent words. The five functions applied in the system are (1) to select a word from candidate words, (2) to correct candidate words using a confusion matrix of syllables, (3) to combine two substrings to a word that spans two lines, (4) to guess unknown nouns, and (5) to confirm a selected word by the contextual information of adjacent words. To improve speed, we use syllable di-grams and viable-prefixes of Korean words. The experimental result shows that the two heuristics speed up the system more than 1,000 times in worst case. Our system improves the word recognition rate of the OCR system from 90.50% to 94.72%.",
keywords = "confusion matrix, distance evaluation function, heuristics, postprocessing, syllable di-grams, viable-prefrres",
author = "Kwon, {Hyuk Chul} and Hwang, {Ho Jeong} and Kim, {Min Jung} and Lee, {Seong Whan}",
note = "Publisher Copyright: {\textcopyright} 1995 IEEE.; 3rd International Conference on Document Analysis and Recognition, ICDAR 1995 ; Conference date: 14-08-1995 Through 16-08-1995",
year = "1995",
doi = "10.1109/ICDAR.1995.601958",
language = "English",
series = "Proceedings of the International Conference on Document Analysis and Recognition, ICDAR",
publisher = "IEEE Computer Society",
pages = "557--562",
booktitle = "Proceedings of the 3rd International Conference on Document Analysis and Recognition, ICDAR 1995",
}