use of de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData in project webanno by webanno.
the class WebannoTsv3Reader method convertToCas.
public void convertToCas(JCas aJCas, InputStream aIs, String aEncoding) throws IOException {
DocumentMetaData documentMetadata = DocumentMetaData.get(aJCas);
fileName = documentMetadata.getDocumentTitle();
// setLayerAndFeature(aJCas, aIs, aEncoding);
setAnnotations(aJCas, aIs, aEncoding);
aJCas.setDocumentText(coveredText.toString());
}
use of de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData in project webanno by webanno.
the class TcfWriter method process.
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
InputStream docIS = null;
try {
boolean writeWithoutMerging = true;
if (merge) {
OutputStream docOS = null;
try {
docOS = getOutputStream(aJCas, filenameSuffix);
// Get the original TCF file and preserve it
DocumentMetaData documentMetadata = DocumentMetaData.get(aJCas);
URL filePathUrl = new URL(documentMetadata.getDocumentUri());
try {
docIS = filePathUrl.openStream();
try {
getLogger().debug("Merging with [" + documentMetadata.getDocumentUri() + "]");
casToTcfWriter(docIS, aJCas, docOS);
writeWithoutMerging = false;
}// Workaround: catch all exceptions
catch (Exception ex) {
getLogger().debug("Source file is not TCF: " + ex.getMessage());
}
} catch (IOException e) {
getLogger().debug("Cannot open source file to merge with: " + e.getMessage());
}
} finally {
closeQuietly(docOS);
}
} else {
getLogger().debug("Merging disabled");
}
// If merging failed or is disabled, go on without merging
if (writeWithoutMerging) {
OutputStream docOS = null;
try {
docOS = getOutputStream(aJCas, filenameSuffix);
casToTcfWriter(aJCas, docOS);
} finally {
closeQuietly(docOS);
}
}
} catch (Exception e) {
throw new AnalysisEngineProcessException(e);
} finally {
closeQuietly(docIS);
}
}
use of de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData in project webanno by webanno.
the class TeiReaderTest method testTeiReader.
@Test
@Ignore("No TEI yet to opensource ")
public void testTeiReader() throws Exception {
CollectionReaderDescription reader = createReaderDescription(TeiReader.class, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_SOURCE_LOCATION, "classpath:/local/", TeiReader.PARAM_PATTERNS, new String[] { "[+]*.xml" });
String firstSentence = "70 I DAG.";
for (JCas jcas : new JCasIterable(reader)) {
DocumentMetaData meta = DocumentMetaData.get(jcas);
String text = jcas.getDocumentText();
System.out.printf("%s - %d%n", meta.getDocumentId(), text.length());
System.out.println(jcas.getDocumentLanguage());
assertEquals(2235, JCasUtil.select(jcas, Token.class).size());
assertEquals(745, JCasUtil.select(jcas, POS.class).size());
assertEquals(745, JCasUtil.select(jcas, Lemma.class).size());
assertEquals(0, JCasUtil.select(jcas, NamedEntity.class).size());
assertEquals(30, JCasUtil.select(jcas, Sentence.class).size());
assertEquals(firstSentence, JCasUtil.select(jcas, Sentence.class).iterator().next().getCoveredText());
}
}
use of de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData in project webanno by webanno.
the class WebannoTsv1Reader method convertToCas.
public void convertToCas(JCas aJCas, InputStream aIs, String aEncoding) throws IOException {
StringBuilder text = new StringBuilder();
Map<Integer, String> tokens = new HashMap<>();
Map<Integer, String> pos = new HashMap<>();
Map<Integer, String> lemma = new HashMap<>();
Map<Integer, String> namedEntity = new HashMap<>();
Map<Integer, String> dependencyFunction = new HashMap<>();
Map<Integer, Integer> dependencyDependent = new HashMap<>();
List<Integer> firstTokenInSentence = new ArrayList<>();
DocumentMetaData documentMetadata = DocumentMetaData.get(aJCas);
fileName = documentMetadata.getDocumentTitle();
setAnnotations(aIs, aEncoding, text, tokens, pos, lemma, namedEntity, dependencyFunction, dependencyDependent, firstTokenInSentence);
aJCas.setDocumentText(text.toString());
Map<String, Token> tokensStored = new HashMap<>();
createToken(aJCas, text, tokens, pos, lemma, tokensStored);
createNamedEntity(namedEntity, aJCas, tokens, tokensStored);
createDependency(aJCas, tokens, dependencyFunction, dependencyDependent, tokensStored);
createSentence(aJCas, firstTokenInSentence, tokensStored);
}
use of de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData in project dkpro-tc by dkpro.
the class PairReader_ImplBase method createMetaData.
protected void createMetaData(JCas jcas, String collectionId, String docId, String docTitle) {
DocumentMetaData metaData = DocumentMetaData.create(jcas);
metaData.setCollectionId(collectionId);
metaData.setDocumentBaseUri("");
metaData.setDocumentUri("/" + docId);
metaData.setDocumentTitle(docTitle);
metaData.setDocumentId(docId);
}
Aggregations