use of edu.stanford.nlp.simple.Sentence in project Anserini by castorini.
the class RetrieveSentences method getRankedPassages.
public void getRankedPassages(Args args) throws Exception {
Map<String, Float> scoredDocs = retrieveDocuments(args);
Map<String, Float> sentencesMap = new LinkedHashMap<>();
IndexUtils util = new IndexUtils(args.index);
TokenizerFactory<CoreLabel> tokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
for (Map.Entry<String, Float> doc : scoredDocs.entrySet()) {
List<Sentence> sentences = util.getSentDocument(doc.getKey());
for (Sentence sent : sentences) {
List<CoreLabel> tokens = tokenizerFactory.getTokenizer(new StringReader(sent.text())).tokenize();
String answerTokens = tokens.stream().map(CoreLabel::toString).collect(Collectors.joining(" "));
sentencesMap.put(answerTokens, doc.getValue());
}
}
String queryTokens = tokenizerFactory.getTokenizer(new StringReader(args.query)).tokenize().stream().map(CoreLabel::toString).collect(Collectors.joining(" "));
scorer.score(queryTokens, sentencesMap);
List<ScoredPassage> topPassages = scorer.extractTopPassages();
for (ScoredPassage s : topPassages) {
System.out.println(s.getSentence() + " " + s.getScore());
}
}
use of edu.stanford.nlp.simple.Sentence in project CoreNLP by stanfordnlp.
the class KBPRelationExtractor method readDataset.
/**
* Read a dataset from a CoNLL formatted input file
* @param conllInputFile The input file, formatted as a TSV
* @return A list of examples.
*/
@SuppressWarnings("StatementWithEmptyBody")
static List<Pair<KBPInput, String>> readDataset(File conllInputFile) throws IOException {
BufferedReader reader = IOUtils.readerFromFile(conllInputFile);
List<Pair<KBPInput, String>> examples = new ArrayList<>();
int i = 0;
String relation = null;
List<String> tokens = new ArrayList<>();
Span subject = new Span(Integer.MAX_VALUE, Integer.MIN_VALUE);
NERTag subjectNER = null;
Span object = new Span(Integer.MAX_VALUE, Integer.MIN_VALUE);
NERTag objectNER = null;
String line = reader.readLine();
if (!line.startsWith("#")) {
throw new IllegalArgumentException("First line of input file should be header definition");
}
while ((line = reader.readLine()) != null) {
String[] fields = line.split("\t");
if (relation == null) {
// Case: read the relation
assert fields.length == 1;
relation = fields[0];
} else if (fields.length == 9) {
// Case: read a token
tokens.add(fields[0]);
if ("SUBJECT".equals(fields[1])) {
subject = new Span(Math.min(subject.start(), i), Math.max(subject.end(), i + 1));
subjectNER = valueOf(fields[2].toUpperCase());
} else if ("OBJECT".equals(fields[3])) {
object = new Span(Math.min(object.start(), i), Math.max(object.end(), i + 1));
objectNER = valueOf(fields[4].toUpperCase());
} else if ("-".equals(fields[1]) && "-".equals(fields[3])) {
// do nothing
} else {
throw new IllegalStateException("Could not parse CoNLL file");
}
i += 1;
} else if (StringUtils.isNullOrEmpty(line.trim())) {
// Case: commit a sentence
examples.add(Pair.makePair(new KBPInput(subject, object, subjectNER, objectNER, new Sentence(tokens)), relation));
// (clear the variables)
i = 0;
relation = null;
tokens = new ArrayList<>();
subject = new Span(Integer.MAX_VALUE, Integer.MIN_VALUE);
object = new Span(Integer.MAX_VALUE, Integer.MIN_VALUE);
} else {
throw new IllegalStateException("Could not parse CoNLL file");
}
}
return examples;
}
use of edu.stanford.nlp.simple.Sentence in project CoreNLP by stanfordnlp.
the class KBPSemgrexExtractor method classify.
@Override
public Pair<String, Double> classify(KBPInput input) {
for (RelationType rel : RelationType.values()) {
if (rules.containsKey(rel) && rel.entityType == input.subjectType && rel.validNamedEntityLabels.contains(input.objectType)) {
Collection<SemgrexPattern> rulesForRel = rules.get(rel);
CoreMap sentence = input.sentence.asCoreMap(Sentence::nerTags, Sentence::dependencyGraph);
boolean matches = matches(sentence, rulesForRel, input, sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class)) || matches(sentence, rulesForRel, input, sentence.get(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation.class));
if (matches) {
//logger.log("MATCH for " + rel + ". " + sentence: + sentence + " with rules for " + rel);
return Pair.makePair(rel.canonicalName, 1.0);
}
}
}
return Pair.makePair(NO_RELATION, 1.0);
}
use of edu.stanford.nlp.simple.Sentence in project CoreNLP by stanfordnlp.
the class TSVSentenceIteratorITest method testOnlyGloss.
/**
* TODO(chaganty): Support creation of sentences with TSV iterator without any tokens annotations.
* Currently, Sentence does not like that.
*/
//@Test
public void testOnlyGloss() {
List<List<String>> entries = new ArrayList<>();
entries.add(new ArrayList<String>() {
{
add("124");
add("docid1");
add("1");
add("This is a test document.");
}
});
TSVSentenceIterator it = new TSVSentenceIterator(entries.iterator(), new ArrayList<SentenceField>() {
{
add(SentenceField.ID);
add(SentenceField.DOC_ID);
add(SentenceField.SENTENCE_INDEX);
add(SentenceField.GLOSS);
}
});
Sentence sentence = it.next();
Assert.assertEquals(1, sentence.sentenceIndex());
Assert.assertEquals("This is a test document.", sentence.text());
Assert.assertEquals("docid1", sentence.asCoreMap().get(CoreAnnotations.DocIDAnnotation.class));
Assert.assertEquals("124", sentence.asCoreMap().get(CoreAnnotations.SentenceIDAnnotation.class));
}
use of edu.stanford.nlp.simple.Sentence in project Info-Evaluation by TechnionYP5777.
the class App method main.
public static void main(final String[] args) {
System.out.println("Hello World!");
final Sentence sent = new Sentence("Justin Bieber is in the sky with diamonds.");
System.out.println(Arrays.toString(sent.nerTags().toArray()));
// NNP
final String firstPOSTag = sent.posTag(0);
System.out.println(sent.word(0));
System.out.println(firstPOSTag);
}
Aggregations