use of edu.stanford.nlp.process.WordTokenFactory in project uuusa by aghie.
the class Processor method process.
public List<SentimentDependencyGraph> process(String text) {
// HashMap<String, String> emoLookupTable = new HashMap<String,String>();
// for (String emoticon : emoticons){
// System.out.println(emoticon);
// String emouuid = UUID.randomUUID().toString();
// text.replaceAll(emoticon, emouuid);
// emoLookupTable.put(emouuid, emoticon);
// }
List<SentimentDependencyGraph> sdgs = new ArrayList<SentimentDependencyGraph>();
DocumentPreprocessor dp = new DocumentPreprocessor(new StringReader(text.concat(" ")));
dp.setTokenizerFactory(PTBTokenizer.factory(new WordTokenFactory(), "ptb3Escaping=false"));
for (List<HasWord> sentence : dp) {
List<String> words = sentence.stream().map(w -> w.toString()).collect(Collectors.toList());
// System.out.println("text: "+text);
List<String> tokens = this.tokenizer.tokenize(String.join(" ", words));
// System.out.println("tokens: "+tokens);
List<TaggedTokenInformation> ttis = this.tagger.tag(tokens);
sdgs.add(this.parser.parse(ttis));
}
// this.parser.parse(ttis);
return sdgs;
}
Aggregations