Search in sources :

Example 6 with CollectionReader

use of org.apache.uima.collection.CollectionReader in project webanno by webanno.

the class WebAnnoTsv3XReaderWriterTest method test.

@Test
public void test() throws Exception {
    String targetFolder = "target/test-output/" + testContext.getTestOutputFolderName();
    CollectionReader reader = CollectionReaderFactory.createReader(WebannoTsv3XReader.class, WebannoTsv3XReader.PARAM_SOURCE_LOCATION, "src/test/resources/tsv3/", WebannoTsv3XReader.PARAM_PATTERNS, "coref.tsv");
    AnalysisEngineDescription writer = createEngineDescription(WebannoTsv3XWriter.class, WebannoTsv3XWriter.PARAM_TARGET_LOCATION, targetFolder, WebannoTsv3XWriter.PARAM_STRIP_EXTENSION, true);
    runPipeline(reader, writer);
    CollectionReader reader1 = CollectionReaderFactory.createReader(WebannoTsv3XReader.class, WebannoTsv3XReader.PARAM_SOURCE_LOCATION, "src/test/resources/tsv3/", WebannoTsv3XReader.PARAM_PATTERNS, "coref.tsv");
    CollectionReader reader2 = CollectionReaderFactory.createReader(WebannoTsv3XReader.class, WebannoTsv3XReader.PARAM_SOURCE_LOCATION, targetFolder, WebannoTsv3XReader.PARAM_PATTERNS, "coref.tsv");
    CAS cas1 = JCasFactory.createJCas().getCas();
    reader1.getNext(cas1);
    CAS cas2 = JCasFactory.createJCas().getCas();
    reader2.getNext(cas2);
    assertEquals(JCasUtil.select(cas2.getJCas(), Token.class).size(), JCasUtil.select(cas1.getJCas(), Token.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), POS.class).size(), JCasUtil.select(cas1.getJCas(), POS.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), Lemma.class).size(), JCasUtil.select(cas1.getJCas(), Lemma.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), NamedEntity.class).size(), JCasUtil.select(cas1.getJCas(), NamedEntity.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), Sentence.class).size(), JCasUtil.select(cas1.getJCas(), Sentence.class).size());
    assertEquals(JCasUtil.select(cas2.getJCas(), Dependency.class).size(), JCasUtil.select(cas1.getJCas(), Dependency.class).size());
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) CAS(org.apache.uima.cas.CAS) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) Test(org.junit.Test)

Example 7 with CollectionReader

use of org.apache.uima.collection.CollectionReader in project webanno by webanno.

the class XmiWriterReaderTest method read.

public void read() throws Exception {
    CollectionReader xmiReader = CollectionReaderFactory.createReader(XmiReader.class, ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION, testFolder.getRoot().getPath(), ResourceCollectionReaderBase.PARAM_PATTERNS, new String[] { ResourceCollectionReaderBase.INCLUDE_PREFIX + "*.xmi" });
    CAS cas = CasCreationUtils.createCas(createTypeSystemDescription(), null, null);
    xmiReader.getNext(cas);
    String refText = readFileToString(new File("src/test/resources/texts/latin.txt"));
    assertEquals(refText, cas.getDocumentText());
    assertEquals("latin", cas.getDocumentLanguage());
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) CAS(org.apache.uima.cas.CAS) FileUtils.readFileToString(org.apache.commons.io.FileUtils.readFileToString) File(java.io.File)

Example 8 with CollectionReader

use of org.apache.uima.collection.CollectionReader in project webanno by webanno.

the class TwoPairedKappaTest method init.

@Before
public void init() throws Exception {
    user1 = new User();
    user1.setUsername("user1");
    user2 = new User();
    user2.setUsername("user2");
    user3 = new User();
    user3.setUsername("user3");
    document = new SourceDocument();
    kappatestCas = JCasFactory.createJCas().getCas();
    CollectionReader reader1 = createReader(WebannoTsv2Reader.class, WebannoTsv2Reader.PARAM_SOURCE_LOCATION, "src/test/resources/", WebannoTsv2Reader.PARAM_PATTERNS, "kappatest.tsv");
    reader1.getNext(kappatestCas);
    kappaspandiff = JCasFactory.createJCas().getCas();
    CollectionReader reader2 = createReader(WebannoTsv2Reader.class, WebannoTsv2Reader.PARAM_SOURCE_LOCATION, "src/test/resources/", WebannoTsv2Reader.PARAM_PATTERNS, "kappaspandiff.tsv");
    reader2.getNext(kappaspandiff);
    kappaarcdiff = JCasFactory.createJCas().getCas();
    CollectionReader reader3 = createReader(WebannoTsv2Reader.class, WebannoTsv2Reader.PARAM_SOURCE_LOCATION, "src/test/resources/", WebannoTsv2Reader.PARAM_PATTERNS, "kappaarcdiff.tsv");
    reader3.getNext(kappaarcdiff);
    kappaspanarcdiff = JCasFactory.createJCas().getCas();
    CollectionReader reader4 = createReader(WebannoTsv2Reader.class, WebannoTsv2Reader.PARAM_SOURCE_LOCATION, "src/test/resources/", WebannoTsv2Reader.PARAM_PATTERNS, "kappaspanarcdiff.tsv");
    reader4.getNext(kappaspanarcdiff);
}
Also used : User(de.tudarmstadt.ukp.clarin.webanno.security.model.User) CollectionReader(org.apache.uima.collection.CollectionReader) SourceDocument(de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument) Before(org.junit.Before)

Example 9 with CollectionReader

use of org.apache.uima.collection.CollectionReader in project dkpro-tc by dkpro.

the class WekaSaveAndLoadModelDocumentRegression method regressionLoadModel.

private void regressionLoadModel(File modelFolder) throws UIMAException, IOException {
    CollectionReader reader = CollectionReaderFactory.createReader(LinewiseTextOutcomeReader.class, LinewiseTextOutcomeReader.PARAM_OUTCOME_INDEX, 0, LinewiseTextOutcomeReader.PARAM_TEXT_INDEX, 1, LinewiseTextOutcomeReader.PARAM_SOURCE_LOCATION, regressionTest, LinewiseTextOutcomeReader.PARAM_LANGUAGE, "en");
    AnalysisEngine segmenter = AnalysisEngineFactory.createEngine(BreakIteratorSegmenter.class);
    AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
    JCas jcas = JCasFactory.createJCas();
    reader.hasNext();
    reader.getNext(jcas.getCas());
    segmenter.process(jcas);
    tcAnno.process(jcas);
    List<TextClassificationOutcome> outcomes = new ArrayList<>(JCasUtil.select(jcas, TextClassificationOutcome.class));
    assertEquals(1, outcomes.size());
    Double d = Double.valueOf(outcomes.get(0).getOutcome());
    assertTrue(d > 0.1 && d < 5);
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine)

Example 10 with CollectionReader

use of org.apache.uima.collection.CollectionReader in project dkpro-tc by dkpro.

the class WekaSaveAndLoadModelUnitTest method unitLoadModel.

private static void unitLoadModel(File modelFolder) throws Exception {
    CollectionReader reader = CollectionReaderFactory.createReader(TeiReader.class, TeiReader.PARAM_SOURCE_LOCATION, unitTrainFolder, TeiReader.PARAM_LANGUAGE, "en", TeiReader.PARAM_PATTERNS, "*.xml");
    AnalysisEngine tcAnno = AnalysisEngineFactory.createEngine(TcAnnotator.class, TcAnnotator.PARAM_TC_MODEL_LOCATION, modelFolder.getAbsolutePath(), TcAnnotator.PARAM_NAME_UNIT_ANNOTATION, Token.class.getName());
    JCas jcas = JCasFactory.createJCas();
    reader.getNext(jcas.getCas());
    // sanity check i.e. check
    assertEquals(163, JCasUtil.select(jcas, Token.class).size());
    // number of tokens which
    // determine number of
    // outcomes that are expected
    tcAnno.process(jcas);
    Collection<TextClassificationOutcome> outcomes = JCasUtil.select(jcas, TextClassificationOutcome.class);
    // 168 tokens in the training file we expect for each an
    assertEquals(163, outcomes.size());
    // outcome
    for (TextClassificationOutcome o : outcomes) {
        assertTrue(!o.getOutcome().isEmpty());
    }
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) TextClassificationOutcome(org.dkpro.tc.api.type.TextClassificationOutcome) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine)

Aggregations

CollectionReader (org.apache.uima.collection.CollectionReader)35 JCas (org.apache.uima.jcas.JCas)28 ArrayList (java.util.ArrayList)25 TextClassificationOutcome (org.dkpro.tc.api.type.TextClassificationOutcome)15 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)14 Test (org.junit.Test)13 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)8 CAS (org.apache.uima.cas.CAS)7 File (java.io.File)5 TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)5 List (java.util.List)4 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)4 TextClassificationSequence (org.dkpro.tc.api.type.TextClassificationSequence)4 POS (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS)3 Lemma (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma)3 TypeSystemDescription (org.apache.uima.resource.metadata.TypeSystemDescription)3 Evaluator (de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.Evaluator)2 PossibleValue (de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.PossibleValue)2 ValuesGenerator (de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.ValuesGenerator)2 ConstraintsGrammar (de.tudarmstadt.ukp.clarin.webanno.constraints.grammar.ConstraintsGrammar)2