use of org.dkpro.tc.api.type.TextClassificationSequence in project dkpro-tc by dkpro.
the class SequenceOutcomeAnnotator method process.
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
for (Sentence sent : JCasUtil.select(aJCas, Sentence.class)) {
TextClassificationSequence sequence = new TextClassificationSequence(aJCas, sent.getBegin(), sent.getEnd());
sequence.addToIndexes();
List<Token> tokens = JCasUtil.selectCovered(aJCas, Token.class, sent);
for (Token token : tokens) {
TextClassificationTarget unit = new TextClassificationTarget(aJCas, token.getBegin(), token.getEnd());
unit.setId(tcId++);
unit.setSuffix(token.getCoveredText());
unit.addToIndexes();
TextClassificationOutcome outcome = new TextClassificationOutcome(aJCas, token.getBegin(), token.getEnd());
outcome.setOutcome(getTextClassificationOutcome(aJCas, unit));
outcome.addToIndexes();
}
}
}
use of org.dkpro.tc.api.type.TextClassificationSequence in project dkpro-tc by dkpro.
the class CrfSuiteLoadModelConnector method process.
@Override
public void process(JCas jcas) throws AnalysisEngineProcessException {
try {
int sequenceId = 0;
List<Instance> instance = new ArrayList<>();
for (TextClassificationSequence seq : JCasUtil.select(jcas, TextClassificationSequence.class)) {
instance.addAll(getInstancesInSequence(featureExtractors, jcas, seq, true, sequenceId++));
}
CrfSuiteFeatureFormatExtractionIterator iterator = new CrfSuiteFeatureFormatExtractionIterator(instance);
// takes N sequences and classifies them - all results are hold in
// memory
StringBuilder output = new StringBuilder();
while (iterator.hasNext()) {
StringBuilder buffer = new StringBuilder();
int limit = 5000;
int idx = 0;
while (iterator.hasNext()) {
StringBuilder seqInfo = iterator.next();
buffer.append(seqInfo);
idx++;
if (idx == limit) {
break;
}
}
List<String> command = buildCommand();
StringBuilder out = runCommand(command, buffer.toString());
output.append(out);
}
setPredictedOutcome(jcas, output.toString());
} catch (Exception e) {
throw new AnalysisEngineProcessException(e);
}
}
use of org.dkpro.tc.api.type.TextClassificationSequence in project dkpro-tc by dkpro.
the class TcuLookUpTable method extract.
public Set<Feature> extract(JCas aView, TextClassificationTarget aTarget) throws TextClassificationException {
if (isTheSameDocument(aView)) {
return null;
}
begin2Unit = new HashMap<Integer, TextClassificationTarget>();
unitBegin2Idx = new HashMap<Integer, Integer>();
idx2SequenceBegin = new HashMap<Integer, Boolean>();
idx2SequenceEnd = new HashMap<Integer, Boolean>();
units = new ArrayList<TextClassificationTarget>();
int i = 0;
for (TextClassificationTarget t : JCasUtil.select(aView, TextClassificationTarget.class)) {
Integer begin = t.getBegin();
Integer end = t.getEnd();
begin2Unit.put(begin, t);
unitBegin2Idx.put(begin, i);
unitEnd2Idx.put(end, i);
units.add(t);
i++;
}
for (TextClassificationSequence sequence : JCasUtil.select(aView, TextClassificationSequence.class)) {
Integer begin = sequence.getBegin();
Integer end = sequence.getEnd();
Integer idxStartUnit = unitBegin2Idx.get(begin);
Integer idxEndUnit = unitEnd2Idx.get(end);
idx2SequenceBegin.put(idxStartUnit, true);
idx2SequenceEnd.put(idxEndUnit, true);
}
return null;
}
use of org.dkpro.tc.api.type.TextClassificationSequence in project dkpro-tc by dkpro.
the class SequenceOutcomeReaderTest method testReader.
@Test
public void testReader() throws Exception {
CollectionReader reader = CollectionReaderFactory.createReader(SequenceOutcomeReader.class, SequenceOutcomeReader.PARAM_SOURCE_LOCATION, "src/test/resources/sequence/", SequenceOutcomeReader.PARAM_PATTERNS, "posDummy.txt", SequenceOutcomeReader.PARAM_SEQUENCES_PER_CAS, 1);
List<List<String>> readSequences = new ArrayList<>();
List<List<String>> readOutcomes = new ArrayList<>();
int seqTargets = 0;
while (reader.hasNext()) {
JCas theJCas = JCasFactory.createJCas();
reader.getNext(theJCas.getCas());
Collection<TextClassificationSequence> sequence = JCasUtil.select(theJCas, TextClassificationSequence.class);
for (TextClassificationSequence s : sequence) {
List<TextClassificationTarget> targets = JCasUtil.selectCovered(theJCas, TextClassificationTarget.class, s);
List<String> tokens = new ArrayList<>();
for (TextClassificationTarget target : targets) {
tokens.add(target.getCoveredText());
}
readSequences.add(tokens);
}
for (TextClassificationSequence s : sequence) {
List<TextClassificationOutcome> outcomeAnnotations = JCasUtil.selectCovered(theJCas, TextClassificationOutcome.class, s);
List<String> outcomes = new ArrayList<>();
for (TextClassificationOutcome o : outcomeAnnotations) {
outcomes.add(o.getOutcome());
}
readOutcomes.add(outcomes);
}
seqTargets += JCasUtil.select(theJCas, TextClassificationSequence.class).size();
}
assertEquals(3, seqTargets);
assertEquals(3, readSequences.size());
assertEquals(3, readOutcomes.size());
assertEquals(4, readSequences.get(0).size());
// 1 - tokens
assertEquals("This", readSequences.get(0).get(0));
assertEquals("is", readSequences.get(0).get(1));
assertEquals("a", readSequences.get(0).get(2));
assertEquals("test", readSequences.get(0).get(3));
// 2 - outcomes
assertEquals("DET", readOutcomes.get(0).get(0));
assertEquals("VERB", readOutcomes.get(0).get(1));
assertEquals("DET", readOutcomes.get(0).get(2));
assertEquals("NOUN", readOutcomes.get(0).get(3));
assertEquals(5, readSequences.get(1).size());
// 2 - tokens
assertEquals("This2", readSequences.get(1).get(0));
assertEquals("is2", readSequences.get(1).get(1));
assertEquals("a2", readSequences.get(1).get(2));
assertEquals("#test2", readSequences.get(1).get(3));
assertEquals("!", readSequences.get(1).get(4));
// 2 - outcomes
assertEquals("DET2", readOutcomes.get(1).get(0));
assertEquals("VERB2", readOutcomes.get(1).get(1));
assertEquals("DET2", readOutcomes.get(1).get(2));
assertEquals("NOUN2", readOutcomes.get(1).get(3));
assertEquals("PUNCT2", readOutcomes.get(1).get(4));
assertEquals(6, readSequences.get(2).size());
// 3 - tokens
assertEquals("This3", readSequences.get(2).get(0));
assertEquals("is3", readSequences.get(2).get(1));
assertEquals("a3", readSequences.get(2).get(2));
assertEquals("test3", readSequences.get(2).get(3));
assertEquals("!", readSequences.get(2).get(4));
assertEquals("!", readSequences.get(2).get(5));
// 3 - outcomes
assertEquals("DET3", readOutcomes.get(2).get(0));
assertEquals("VERB3", readOutcomes.get(2).get(1));
assertEquals("DET3", readOutcomes.get(2).get(2));
assertEquals("NOUN3", readOutcomes.get(2).get(3));
assertEquals("PUNCT3", readOutcomes.get(2).get(4));
assertEquals("PUNCT3", readOutcomes.get(2).get(5));
}
use of org.dkpro.tc.api.type.TextClassificationSequence in project dkpro-tc by dkpro.
the class SequenceOutcomeReaderTest method testReaderIndexParameter.
@Test
public void testReaderIndexParameter() throws Exception {
CollectionReader reader = CollectionReaderFactory.createReader(SequenceOutcomeReader.class, SequenceOutcomeReader.PARAM_SOURCE_LOCATION, "src/test/resources/sequence/", SequenceOutcomeReader.PARAM_PATTERNS, "otherFormat.txt", SequenceOutcomeReader.PARAM_OUTCOME_INDEX, 1, SequenceOutcomeReader.PARAM_TOKEN_INDEX, 2);
List<List<String>> readSequences = new ArrayList<>();
List<List<String>> readOutcomes = new ArrayList<>();
int seqTargets = 0;
while (reader.hasNext()) {
JCas theJCas = JCasFactory.createJCas();
reader.getNext(theJCas.getCas());
Collection<TextClassificationSequence> sequences = JCasUtil.select(theJCas, TextClassificationSequence.class);
for (TextClassificationSequence s : sequences) {
List<TextClassificationTarget> targets = JCasUtil.selectCovered(theJCas, TextClassificationTarget.class, s);
List<String> tokens = new ArrayList<>();
for (TextClassificationTarget target : targets) {
tokens.add(target.getCoveredText());
}
readSequences.add(tokens);
}
Collection<TextClassificationSequence> outcomeSequences = JCasUtil.select(theJCas, TextClassificationSequence.class);
for (TextClassificationSequence s : outcomeSequences) {
List<TextClassificationOutcome> outcomeAnnotations = JCasUtil.selectCovered(theJCas, TextClassificationOutcome.class, s);
List<String> outcomes = new ArrayList<>();
for (TextClassificationOutcome o : outcomeAnnotations) {
outcomes.add(o.getOutcome());
}
readOutcomes.add(outcomes);
}
seqTargets += JCasUtil.select(theJCas, TextClassificationSequence.class).size();
}
assertEquals(2, seqTargets);
assertEquals(2, readSequences.size());
assertEquals(2, readOutcomes.size());
assertEquals(4, readSequences.get(0).size());
// 1 - tokens
assertEquals("This", readSequences.get(0).get(0));
assertEquals("is", readSequences.get(0).get(1));
assertEquals("a", readSequences.get(0).get(2));
assertEquals("test", readSequences.get(0).get(3));
// 2 - outcomes
assertEquals("DET", readOutcomes.get(0).get(0));
assertEquals("VERB", readOutcomes.get(0).get(1));
assertEquals("DET", readOutcomes.get(0).get(2));
assertEquals("NOUN", readOutcomes.get(0).get(3));
assertEquals(5, readSequences.get(1).size());
// 2 - tokens
assertEquals("This2", readSequences.get(1).get(0));
assertEquals("is2", readSequences.get(1).get(1));
assertEquals("a2", readSequences.get(1).get(2));
assertEquals("test2", readSequences.get(1).get(3));
assertEquals("!2", readSequences.get(1).get(4));
// 2 - outcomes
assertEquals("DET2", readOutcomes.get(1).get(0));
assertEquals("VERB2", readOutcomes.get(1).get(1));
assertEquals("DET2", readOutcomes.get(1).get(2));
assertEquals("NOUN2", readOutcomes.get(1).get(3));
assertEquals("PUNCT2", readOutcomes.get(1).get(4));
}
Aggregations