use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.
the class NGramUtilsTest method characterBiGrams.
@Test
public void characterBiGrams() throws Exception {
String text = "A house";
JCas jcas = JCasFactory.createJCas();
jcas.setDocumentLanguage("en");
jcas.setDocumentText(text);
JCasBuilder cb = new JCasBuilder(jcas);
for (String token : text.split(" ")) {
cb.add(token, Token.class);
}
TextClassificationTarget tu = new TextClassificationTarget(jcas, 2, 7);
tu.addToIndexes();
FrequencyDistribution<String> ngrams = CharacterNGramMC.getAnnotationCharacterNgrams(tu, false, 2, 3, '^', '$');
for (String s : ngrams.getKeys()) {
System.out.println(s);
}
assertEquals(11, ngrams.getN());
assertTrue(ngrams.contains("^h"));
assertTrue(ngrams.contains("ho"));
assertTrue(ngrams.contains("ou"));
assertTrue(ngrams.contains("us"));
assertTrue(ngrams.contains("se"));
assertTrue(ngrams.contains("se$"));
assertTrue(ngrams.contains("^ho"));
assertTrue(ngrams.contains("hou"));
assertTrue(ngrams.contains("ous"));
assertTrue(ngrams.contains("use"));
assertTrue(ngrams.contains("se$"));
}
use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.
the class TestReaderSingleLabelUnitReader method getNext.
@Override
public void getNext(CAS aCAS) throws IOException, CollectionException {
super.getNext(aCAS);
JCas jcas;
try {
jcas = aCAS.getJCas();
JCasId id = new JCasId(jcas);
id.setId(jcasId++);
id.addToIndexes();
} catch (CASException e) {
throw new CollectionException();
}
String documentText = aCAS.getDocumentText();
int s = 0;
for (String t : documentText.split(" ")) {
int e = documentText.indexOf(t, s) + t.length();
new TextClassificationTarget(jcas, s, e).addToIndexes();
new TextClassificationOutcome(jcas, s, e).addToIndexes();
s += 1;
}
}
use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.
the class SequenceOutcomeReaderTest method testSkipLineReader.
@Test
public void testSkipLineReader() throws Exception {
CollectionReader reader = CollectionReaderFactory.createReader(SequenceOutcomeReader.class, SequenceOutcomeReader.PARAM_SOURCE_LOCATION, "src/test/resources/sequence/posDummy.txt", SequenceOutcomeReader.PARAM_SKIP_LINES_START_WITH_STRING, "#");
List<List<String>> readSequences = new ArrayList<>();
List<List<String>> readOutcomes = new ArrayList<>();
while (reader.hasNext()) {
JCas theJCas = JCasFactory.createJCas();
reader.getNext(theJCas.getCas());
Collection<TextClassificationSequence> sequence = JCasUtil.select(theJCas, TextClassificationSequence.class);
for (TextClassificationSequence s : sequence) {
List<TextClassificationTarget> targets = JCasUtil.selectCovered(theJCas, TextClassificationTarget.class, s);
List<String> tokens = new ArrayList<>();
for (TextClassificationTarget target : targets) {
tokens.add(target.getCoveredText());
}
readSequences.add(tokens);
}
Collection<TextClassificationOutcome> outcomeAnnotations = JCasUtil.select(theJCas, TextClassificationOutcome.class);
List<String> outcomes = new ArrayList<>();
for (TextClassificationOutcome o : outcomeAnnotations) {
outcomes.add(o.getOutcome());
}
readOutcomes.add(outcomes);
}
assertEquals(4, readSequences.get(1).size());
// 2 - tokens
assertEquals("This2", readSequences.get(1).get(0));
assertEquals("is2", readSequences.get(1).get(1));
assertEquals("a2", readSequences.get(1).get(2));
assertEquals("!", readSequences.get(1).get(3));
}
use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.
the class TestTargetSurfaceFormContextFeature method testTokenFeatureExtractors.
@Test
public void testTokenFeatureExtractors() throws Exception {
Object[] o = setUp();
JCas jcas = (JCas) o[0];
TextClassificationTarget tcu = (TextClassificationTarget) o[1];
assertResult(jcas, tcu, -4, TargetSurfaceFormContextFeature.OUT_OF_BOUNDARY);
assertResult(jcas, tcu, -3, TargetSurfaceFormContextFeature.BEG_OF_SEQUENCE);
assertResult(jcas, tcu, -2, "it");
assertResult(jcas, tcu, -1, "is");
assertResult(jcas, tcu, 0, "raining");
assertResult(jcas, tcu, +1, "all");
assertResult(jcas, tcu, +2, "day");
assertResult(jcas, tcu, +3, TargetSurfaceFormContextFeature.END_OF_SEQUENCE);
assertResult(jcas, tcu, +4, TargetSurfaceFormContextFeature.OUT_OF_BOUNDARY);
}
use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.
the class FolderwiseDataReader method setTextClassificationTarget.
protected void setTextClassificationTarget(JCas aJCas, Resource currentFile, int begin, int end) {
TextClassificationTarget aTarget = new TextClassificationTarget(aJCas, begin, end);
aTarget.addToIndexes();
}
Aggregations