use of org.dkpro.tc.api.type.TextClassificationSequence in project dkpro-tc by dkpro.
the class BrownCorpusReader method getNext.
@Override
public void getNext(CAS cas) throws IOException, CollectionException {
super.getNext(cas);
JCas jcas;
try {
jcas = cas.getJCas();
} catch (CASException e) {
throw new CollectionException(e);
}
for (Sentence sentence : JCasUtil.select(jcas, Sentence.class)) {
TextClassificationSequence sequence = new TextClassificationSequence(jcas, sentence.getBegin(), sentence.getEnd());
sequence.addToIndexes();
for (Token token : JCasUtil.selectCovered(jcas, Token.class, sentence)) {
TextClassificationTarget unit = new TextClassificationTarget(jcas, token.getBegin(), token.getEnd());
// will add the token content as a suffix to the ID of this unit
unit.setSuffix(token.getCoveredText());
unit.addToIndexes();
TextClassificationOutcome outcome = new TextClassificationOutcome(jcas, token.getBegin(), token.getEnd());
outcome.setOutcome(getTextClassificationOutcome(jcas, unit));
outcome.addToIndexes();
}
}
}
use of org.dkpro.tc.api.type.TextClassificationSequence in project dkpro-tc by dkpro.
the class SequenceOutcomeReaderTest method testSkipLineReader.
@Test
public void testSkipLineReader() throws Exception {
CollectionReader reader = CollectionReaderFactory.createReader(SequenceOutcomeReader.class, SequenceOutcomeReader.PARAM_SOURCE_LOCATION, "src/test/resources/sequence/posDummy.txt", SequenceOutcomeReader.PARAM_SKIP_LINES_START_WITH_STRING, "#");
List<List<String>> readSequences = new ArrayList<>();
List<List<String>> readOutcomes = new ArrayList<>();
while (reader.hasNext()) {
JCas theJCas = JCasFactory.createJCas();
reader.getNext(theJCas.getCas());
Collection<TextClassificationSequence> sequence = JCasUtil.select(theJCas, TextClassificationSequence.class);
for (TextClassificationSequence s : sequence) {
List<TextClassificationTarget> targets = JCasUtil.selectCovered(theJCas, TextClassificationTarget.class, s);
List<String> tokens = new ArrayList<>();
for (TextClassificationTarget target : targets) {
tokens.add(target.getCoveredText());
}
readSequences.add(tokens);
}
Collection<TextClassificationOutcome> outcomeAnnotations = JCasUtil.select(theJCas, TextClassificationOutcome.class);
List<String> outcomes = new ArrayList<>();
for (TextClassificationOutcome o : outcomeAnnotations) {
outcomes.add(o.getOutcome());
}
readOutcomes.add(outcomes);
}
assertEquals(4, readSequences.get(1).size());
// 2 - tokens
assertEquals("This2", readSequences.get(1).get(0));
assertEquals("is2", readSequences.get(1).get(1));
assertEquals("a2", readSequences.get(1).get(2));
assertEquals("!", readSequences.get(1).get(3));
}
use of org.dkpro.tc.api.type.TextClassificationSequence in project dkpro-tc by dkpro.
the class SequenceOutcomeReader method setTextClassificationSequence.
protected void setTextClassificationSequence(JCas aJCas, int begin, int end) {
TextClassificationSequence aSequence = new TextClassificationSequence(aJCas, begin, end);
aSequence.addToIndexes();
}
use of org.dkpro.tc.api.type.TextClassificationSequence in project dkpro-tc by dkpro.
the class TestTaskUtils method initJCas.
private JCas initJCas(boolean setUnitIdAsPartOfTheInstanceId) throws Exception {
AnalysisEngine engine = AnalysisEngineFactory.createEngine(NoOpAnnotator.class);
JCas jCas = engine.newJCas();
JCasId id = new JCasId(jCas);
id.setId(4711);
id.addToIndexes();
DocumentMetaData meta = new DocumentMetaData(jCas);
meta.setDocumentTitle("title");
meta.setDocumentId("4711");
meta.addToIndexes();
String[][] tokens = { // sequence 1
{ "a", "DT" }, // sequence 1
{ "car", "NN" }, // sequence 1
{ "drives", "VBZ" }, // sequence 2
{ "the", "DT" }, // sequence 2
{ "hedgehogs", "NN" }, // sequence 2
{ "dies", "VBZ" } };
StringBuilder sb = new StringBuilder();
for (int i = 0; i < tokens.length; i++) {
int start = sb.length();
int end = start + tokens[i][0].length();
TextClassificationTarget unit = new TextClassificationTarget(jCas, start, end);
if (setUnitIdAsPartOfTheInstanceId) {
unit.setSuffix(tokens[i][0]);
}
unit.setId(i);
unit.addToIndexes();
TextClassificationOutcome outcome = new TextClassificationOutcome(jCas, start, end);
outcome.setOutcome(tokens[i][1]);
outcome.addToIndexes();
sb.append(tokens[i][0]);
if (i + 1 < tokens.length) {
sb.append(" ");
}
}
String text = sb.toString();
jCas.setDocumentText(text);
int lenSeq1 = tokens[0][0].length() + 1 + tokens[1][0].length() + 1 + tokens[2][0].length();
TextClassificationSequence seq1 = new TextClassificationSequence(jCas, 0, lenSeq1);
seq1.addToIndexes();
TextClassificationSequence seq2 = new TextClassificationSequence(jCas, lenSeq1 + 1, text.length());
seq2.addToIndexes();
return jCas;
}
use of org.dkpro.tc.api.type.TextClassificationSequence in project dkpro-tc by dkpro.
the class FoldClassificationUnitCasMultiplier method setTargetAnnotation.
private void setTargetAnnotation(JCas copyJCas) {
if (useSequences) {
for (AnnotationFS s : buf) {
TextClassificationSequence seq = new TextClassificationSequence(copyJCas, s.getBegin(), s.getEnd());
seq.addToIndexes();
seq.setId(seqCounter++);
// re-add the units that are covered by those sequences
for (TextClassificationTarget u : seqModeUnitsCoveredBySequenceAnno) {
u.addToIndexes();
}
seqModeUnitsCoveredBySequenceAnno = new ArrayList<>();
}
} else {
for (AnnotationFS u : buf) {
TextClassificationTarget unit = new TextClassificationTarget(copyJCas, u.getBegin(), u.getEnd());
unit.addToIndexes();
unit.setId(unitCounter);
unitCounter++;
}
}
}
Aggregations