use of edu.stanford.nlp.ling.HasWord in project CoreNLP by stanfordnlp.
the class DocumentPreprocessorTest method compareXMLResults.
private static void compareXMLResults(String input, String element, String... expectedResults) {
ArrayList<String> results = new ArrayList<>();
DocumentPreprocessor document = new DocumentPreprocessor(new BufferedReader(new StringReader(input)), DocumentPreprocessor.DocType.XML);
document.setElementDelimiter(element);
for (List<HasWord> sentence : document) {
results.add(SentenceUtils.listToString(sentence));
}
assertEquals(expectedResults.length, results.size());
for (int i = 0; i < results.size(); ++i) {
assertEquals(expectedResults[i], results.get(i));
}
}
use of edu.stanford.nlp.ling.HasWord in project textdb by TextDB.
the class NlpSplitOperator method computeSentenceList.
private List<Span> computeSentenceList(Tuple inputTuple) {
String inputText = inputTuple.<IField>getField(predicate.getInputAttributeName()).getValue().toString();
Reader reader = new StringReader(inputText);
DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(reader);
List<Span> sentenceList = new ArrayList<Span>();
int start = 0;
int end = 0;
String key = PropertyNameConstants.NLP_SPLIT_KEY;
String attributeName = predicate.getInputAttributeName();
for (List<HasWord> sentence : documentPreprocessor) {
String sentenceText = Sentence.listToString(sentence);
//Make span
end = start + sentenceText.length();
Span span = new Span(attributeName, start, end, key, sentenceText);
sentenceList.add(span);
start = end + 1;
}
return sentenceList;
}
Aggregations