@inproceedings{3484eccb74974497acecdaeb82184144,
title = "Maximizing Classification Accuracy in Native Language Identification",
abstract = "This paper reports our contribution to the 2013 NLI Shared Task. The purpose of the task was to train a machine-learning system to identify the native-language affiliations of 1,100 texts written in English by nonnative speakers as part of a high-stakes test of general academic English proficiency. We trained our system on the new TOEFL11 corpus, which includes 11,000 essays written by nonnative speakers from 11 native-language backgrounds. Our final system used an SVM classifier with over 400,000 unique features consisting of lexical and POS n-grams occurring in at least two texts in the training set. Our system identified the correct native-language affiliations of 83.6% of the texts in the test set. This was the highest classification accuracy achieved in the 2013 NLI Shared Task.",
author = "Scott Jarvis and Yves Bestgen and Steve Pepper",
note = "Publisher Copyright: {\textcopyright} 2013 Association for Computational Linguistics.; 8th Workshop on Innovative Use of NLP for Building Educational Applications, BEA 2013 ; Conference date: 13-06-2013",
year = "2013",
language = "English (US)",
series = "Proceedings of the 8th Workshop on Innovative Use of NLP for Building Educational Applications, BEA 2013",
publisher = "Association for Computational Linguistics (ACL)",
pages = "111--118",
editor = "Joel Tetreault and Jill Burstein and Claudia Leacock",
booktitle = "Proceedings of the 8th Workshop on Innovative Use of NLP for Building Educational Applications, BEA 2013",
address = "United States",
}