There are various ways to call the code, but here's a simple example to get started with using either PTBTokenizer
directly or calling DocumentPreprocessor
.
import java.io.FileReader; import java.io.IOException; import java.util.List; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.ling.HasWord; import edu.stanford.nlp.process.CoreLabelTokenFactory; import edu.stanford.nlp.process.DocumentPreprocessor; import edu.stanford.nlp.process.PTBTokenizer; public class TokenizerDemo { public static void main(String[] args) throws IOException { for (String arg : args) { // option #1: By sentence. DocumentPreprocessor dp = new DocumentPreprocessor(arg); for (Listsentence : dp) { System.out.println(sentence); } // option #2: By token PTBTokenizer ptbt = new PTBTokenizer (new FileReader(arg), new CoreLabelTokenFactory(), ""); for (CoreLabel label; ptbt.hasNext(); ) { label = ptbt.next(); System.out.println(label); } } } }
Read full article from The Stanford NLP (Natural Language Processing) Group
No comments:
Post a Comment