use of de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData in project dkpro-tc by dkpro.
the class ReutersCorpusReader method getTextClassificationOutcomes.
@Override
public Set<String> getTextClassificationOutcomes(JCas jcas) throws CollectionException {
Set<String> outcomes = new HashSet<String>();
DocumentMetaData dmd = DocumentMetaData.get(jcas);
String titleWithoutExtension = FilenameUtils.removeExtension(dmd.getDocumentTitle());
if (!goldLabelMap.containsKey(titleWithoutExtension)) {
throw new CollectionException(new Throwable("No gold label for document: " + dmd.getDocumentTitle()));
}
for (String label : goldLabelMap.get(titleWithoutExtension)) {
outcomes.add(label);
}
return outcomes;
}
use of de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData in project dkpro-tc by dkpro.
the class STSReader method getNext.
@Override
public void getNext(JCas jcas) throws IOException, CollectionException {
super.getNext(jcas);
jcas.setDocumentText("");
TextClassificationOutcome outcome = new TextClassificationOutcome(jcas);
outcome.setOutcome(getTextClassificationOutcome(jcas));
outcome.addToIndexes();
// as we are creating more than one CAS out of a single file, we need to have different
// document titles and URIs for each CAS
// otherwise, serialized CASes will be overwritten
DocumentMetaData dmd = DocumentMetaData.get(jcas);
dmd.setDocumentTitle(dmd.getDocumentTitle() + "-" + fileOffset);
dmd.setDocumentUri(dmd.getDocumentUri() + "-" + fileOffset);
fileOffset++;
}
use of de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData in project dkpro-tc by dkpro.
the class MultiLabelOutcomeAnnotator method getTextClassificationOutcomes.
public Set<String> getTextClassificationOutcomes(JCas jcas) throws CollectionException {
Set<String> outcomes = new HashSet<String>();
DocumentMetaData dmd = DocumentMetaData.get(jcas);
String titleWithoutExtension = FilenameUtils.removeExtension(dmd.getDocumentTitle());
if (!goldLabelMap.containsKey(titleWithoutExtension)) {
throw new CollectionException(new Throwable("No gold label for document: " + dmd.getDocumentTitle()));
}
for (String label : goldLabelMap.get(titleWithoutExtension)) {
outcomes.add(label);
}
return outcomes;
}
use of de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData in project dkpro-tc by dkpro.
the class TestTargetSurfaceFormContextFeature method setUp.
private Object[] setUp() throws Exception {
JCas jcas = JCasFactory.createJCas();
jcas.setDocumentText("It is raining all day");
DocumentMetaData dmd = new DocumentMetaData(jcas);
dmd.setDocumentId("1");
dmd.addToIndexes();
AnalysisEngine engine = createEngine(BreakIteratorSegmenter.class);
engine.process(jcas.getCas());
ArrayList<Token> arrayList = new ArrayList<Token>(JCasUtil.select(jcas, Token.class));
Token bb = arrayList.get(0);
TextClassificationTarget tcbb = new TextClassificationTarget(jcas, bb.getBegin(), bb.getEnd());
tcbb.addToIndexes();
Token b = arrayList.get(1);
TextClassificationTarget tcb = new TextClassificationTarget(jcas, b.getBegin(), b.getEnd());
tcb.addToIndexes();
Token c = arrayList.get(2);
TextClassificationTarget tcu = new TextClassificationTarget(jcas, c.getBegin(), c.getEnd());
tcu.addToIndexes();
Token n = arrayList.get(3);
TextClassificationTarget tcn = new TextClassificationTarget(jcas, n.getBegin(), n.getEnd());
tcn.addToIndexes();
Token nn = arrayList.get(4);
TextClassificationTarget tcnn = new TextClassificationTarget(jcas, nn.getBegin(), nn.getEnd());
tcnn.addToIndexes();
return new Object[] { jcas, tcu };
}
use of de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData in project dkpro-tc by dkpro.
the class SequenceOutcomeReader method initializeJCas.
protected void initializeJCas(JCas aJCas) {
DocumentMetaData data = new DocumentMetaData(aJCas);
data.setDocumentId(runningId + "");
data.setDocumentTitle("Title_" + runningId);
data.addToIndexes();
runningId++;
}
Aggregations