use of de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData in project dkpro-tc by dkpro.
the class TestFoldUtil method createSequenceCas.
private void createSequenceCas() throws IOException, UIMAException {
tmpFoldSeq = new TemporaryFolder();
tmpFoldSeq.create();
jcasSequence = JCasFactory.createJCas();
jcasSequence.setDocumentText("One A Two B Three C Four D Five E Six F Seven G Eight H Nine I Ten J");
setSeq(jcasSequence, 0, 5);
setSeq(jcasSequence, 6, 11);
setSeq(jcasSequence, 12, 19);
setSeq(jcasSequence, 20, 26);
setSeq(jcasSequence, 27, 33);
setSeq(jcasSequence, 34, 39);
setSeq(jcasSequence, 40, 47);
setSeq(jcasSequence, 48, 55);
setSeq(jcasSequence, 56, 62);
setSeq(jcasSequence, 63, 68);
DocumentMetaData dmd = new DocumentMetaData(jcasSequence);
dmd.setDocumentId("id");
dmd.addToIndexes();
createJCasIdAnnotation(jcasSequence);
AnalysisEngine xmiWriter = AnalysisEngineFactory.createEngine(BinaryCasWriter.class, BinaryCasWriter.PARAM_TARGET_LOCATION, tmpFoldSeq.getRoot(), BinaryCasWriter.PARAM_FORMAT, "6+");
xmiWriter.process(jcasSequence);
}
use of de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData in project dkpro-tc by dkpro.
the class FoldClassificationUnitCasMultiplier method next.
@Override
public AbstractCas next() throws AnalysisEngineProcessException {
// Create an empty CAS as a destination for a copy.
JCas emptyJCas = this.getEmptyJCas();
DocumentMetaData.create(emptyJCas);
emptyJCas.setDocumentText(jCas.getDocumentText());
CAS emptyCas = emptyJCas.getCas();
// Copy current CAS to the empty CAS.
CasCopier.copyCas(jCas.getCas(), emptyCas, false);
JCas copyJCas;
try {
copyJCas = emptyCas.getJCas();
} catch (CASException e) {
throw new AnalysisEngineProcessException("Exception while creating JCas", null, e);
}
// Check for multiple DocumentMetaData annotations (issue #266)
Collection<DocumentMetaData> metaDataAnnotations = JCasUtil.select(copyJCas, DocumentMetaData.class);
List<DocumentMetaData> metaDataAnnotationsToDelete = new ArrayList<>();
if (metaDataAnnotations.size() > 1)
for (DocumentMetaData metaDataAnnotation : metaDataAnnotations) if ("x-unspecified".equals(metaDataAnnotation.getLanguage()) && metaDataAnnotation.getDocumentTitle() == null && metaDataAnnotation.getDocumentId() == null && metaDataAnnotation.getDocumentUri() == null && metaDataAnnotation.getDocumentBaseUri() == null && metaDataAnnotation.getCollectionId() == null)
metaDataAnnotationsToDelete.add(metaDataAnnotation);
for (DocumentMetaData metaDataAnnotation : metaDataAnnotationsToDelete) copyJCas.removeFsFromIndexes(metaDataAnnotation);
// Set new ids and URIs for copied cases.
// The counting variable keeps track of how many new CAS objects are created from the
// original CAS, a CAS relative counter.
// NOTE: As it may cause confusion: If in sequence classification several or all CAS
// contains only a single sequence this counter would be zero in all cases - this is not a
// bug, but a cosmetic flaw
String currentDocId = DocumentMetaData.get(jCas).getDocumentId();
DocumentMetaData.get(copyJCas).setDocumentId(currentDocId + "_" + subCASCounter);
String currentDocUri = DocumentMetaData.get(jCas).getDocumentUri() + "_" + subCASCounter;
DocumentMetaData.get(copyJCas).setDocumentUri(currentDocUri);
deleteAllTextClassificationAnnotation(copyJCas);
setTargetAnnotation(copyJCas);
assignNewId(copyJCas);
subCASCounter++;
// issue #261
DocumentMetaData.get(copyJCas).setIsLastSegment(subCASCounter == annotations.size());
getLogger().debug("Creating CAS " + subCASCounter + " of " + annotations.size());
return copyJCas;
}
Aggregations