Search in sources :

Example 1 with CollectionReader

use of org.apache.uima.collection.CollectionReader in project webanno by webanno.

the class ImportExportServiceImpl method importCasFromFile.

@Override
@SuppressWarnings({ "rawtypes", "unchecked" })
public JCas importCasFromFile(File aFile, Project aProject, String aFormat) throws UIMAException, IOException {
    Class readerClass = getReadableFormats().get(aFormat);
    if (readerClass == null) {
        throw new IOException("No reader available for format [" + aFormat + "]");
    }
    // Prepare a CAS with the project type system
    TypeSystemDescription builtInTypes = TypeSystemDescriptionFactory.createTypeSystemDescription();
    TypeSystemDescription projectTypes = annotationService.getProjectTypes(aProject);
    TypeSystemDescription allTypes = CasCreationUtils.mergeTypeSystems(asList(projectTypes, builtInTypes));
    CAS cas = JCasFactory.createJCas(allTypes).getCas();
    // Convert the source document to CAS
    CollectionReader reader = CollectionReaderFactory.createReader(readerClass, ResourceCollectionReaderBase.PARAM_SOURCE_LOCATION, aFile.getParentFile().getAbsolutePath(), ResourceCollectionReaderBase.PARAM_PATTERNS, new String[] { "[+]" + aFile.getName() });
    if (!reader.hasNext()) {
        throw new FileNotFoundException("Source file [" + aFile.getName() + "] not found in [" + aFile.getPath() + "]");
    }
    reader.getNext(cas);
    JCas jCas = cas.getJCas();
    // Create sentence / token annotations if they are missing
    boolean hasTokens = JCasUtil.exists(jCas, Token.class);
    boolean hasSentences = JCasUtil.exists(jCas, Sentence.class);
    if (!hasSentences) {
        splitSentences(jCas);
    }
    if (!hasTokens) {
        tokenize(jCas);
    }
    if (!JCasUtil.exists(jCas, Token.class) || !JCasUtil.exists(jCas, Sentence.class)) {
        throw new IOException("The document appears to be empty. Unable to detect any " + "tokens or sentences. Empty documents cannot be imported.");
    }
    return jCas;
}
Also used : TypeSystemDescription(org.apache.uima.resource.metadata.TypeSystemDescription) CollectionReader(org.apache.uima.collection.CollectionReader) CAS(org.apache.uima.cas.CAS) FileNotFoundException(java.io.FileNotFoundException) JCas(org.apache.uima.jcas.JCas) IOException(java.io.IOException)

Example 2 with CollectionReader

use of org.apache.uima.collection.CollectionReader in project webanno by webanno.

the class SymbolicRulesTest method testSimpleSymbolicRules2.

@Test
public void testSimpleSymbolicRules2() throws Exception {
    ConstraintsGrammar parser = new ConstraintsGrammar(new FileInputStream("src/test/resources/rules/symbolic2.rules"));
    Parse p = parser.Parse();
    ParsedConstraints constraints = p.accept(new ParserVisitor());
    JCas jcas = JCasFactory.createJCas();
    CollectionReader reader = createReader(Conll2006Reader.class, Conll2006Reader.PARAM_SOURCE_LOCATION, "src/test/resources/text/1.conll");
    reader.getNext(jcas.getCas());
    POS pos = new POS(jcas, 8, 9);
    pos.setPosValue("pronoun");
    pos.addToIndexes();
    Evaluator constraintsEvaluator = new ValuesGenerator();
    Lemma lemma = select(jcas, Lemma.class).iterator().next();
    List<PossibleValue> possibleValues = constraintsEvaluator.generatePossibleValues(lemma, "value", constraints);
    List<PossibleValue> expectedOutput = new ArrayList<>();
    expectedOutput.add(new PossibleValue("good", true));
    assertEquals(expectedOutput, possibleValues);
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) Parse(de.tudarmstadt.ukp.clarin.webanno.constraints.grammar.syntaxtree.Parse) ParserVisitor(de.tudarmstadt.ukp.clarin.webanno.constraints.visitor.ParserVisitor) ArrayList(java.util.ArrayList) ParsedConstraints(de.tudarmstadt.ukp.clarin.webanno.constraints.model.ParsedConstraints) JCas(org.apache.uima.jcas.JCas) ValuesGenerator(de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.ValuesGenerator) Evaluator(de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.Evaluator) FileInputStream(java.io.FileInputStream) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) Lemma(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma) PossibleValue(de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.PossibleValue) ConstraintsGrammar(de.tudarmstadt.ukp.clarin.webanno.constraints.grammar.ConstraintsGrammar) Test(org.junit.Test)

Example 3 with CollectionReader

use of org.apache.uima.collection.CollectionReader in project webanno by webanno.

the class SymbolicRulesTest method testSimpleSymbolicRules.

@Test
public void testSimpleSymbolicRules() throws Exception {
    ConstraintsGrammar parser = new ConstraintsGrammar(new FileInputStream("src/test/resources/rules/symbolic1.rules"));
    Parse p = parser.Parse();
    ParsedConstraints constraints = p.accept(new ParserVisitor());
    JCas jcas = JCasFactory.createJCas();
    CollectionReader reader = createReader(Conll2006Reader.class, Conll2006Reader.PARAM_SOURCE_LOCATION, "src/test/resources/text/1.conll");
    reader.getNext(jcas.getCas());
    POS pos = new POS(jcas, 8, 9);
    pos.setPosValue("pronoun");
    pos.addToIndexes();
    Evaluator constraintsEvaluator = new ValuesGenerator();
    Lemma lemma = select(jcas, Lemma.class).iterator().next();
    List<PossibleValue> possibleValues = constraintsEvaluator.generatePossibleValues(lemma, "value", constraints);
    List<PossibleValue> expectedOutput = new ArrayList<>();
    expectedOutput.add(new PossibleValue("good", true));
    assertEquals(expectedOutput, possibleValues);
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) Parse(de.tudarmstadt.ukp.clarin.webanno.constraints.grammar.syntaxtree.Parse) ParserVisitor(de.tudarmstadt.ukp.clarin.webanno.constraints.visitor.ParserVisitor) ArrayList(java.util.ArrayList) ParsedConstraints(de.tudarmstadt.ukp.clarin.webanno.constraints.model.ParsedConstraints) JCas(org.apache.uima.jcas.JCas) ValuesGenerator(de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.ValuesGenerator) Evaluator(de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.Evaluator) FileInputStream(java.io.FileInputStream) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) Lemma(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma) PossibleValue(de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.PossibleValue) ConstraintsGrammar(de.tudarmstadt.ukp.clarin.webanno.constraints.grammar.ConstraintsGrammar) Test(org.junit.Test)

Example 4 with CollectionReader

use of org.apache.uima.collection.CollectionReader in project webanno by webanno.

the class DiffUtils method read.

public static JCas read(String aPath) throws UIMAException, IOException {
    CollectionReader reader = createReader(Conll2006Reader.class, Conll2006Reader.PARAM_SOURCE_LOCATION, "src/test/resources/" + aPath);
    JCas jcas = JCasFactory.createJCas();
    reader.getNext(jcas.getCas());
    return jcas;
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) JCas(org.apache.uima.jcas.JCas)

Example 5 with CollectionReader

use of org.apache.uima.collection.CollectionReader in project webanno by webanno.

the class DiffUtils method readWebAnnoTSV.

public static JCas readWebAnnoTSV(String aPath, TypeSystemDescription aType) throws UIMAException, IOException {
    CollectionReader reader = createReader(WebannoTsv2Reader.class, WebannoTsv2Reader.PARAM_SOURCE_LOCATION, "src/test/resources/" + aPath);
    JCas jcas;
    if (aType != null) {
        TypeSystemDescription builtInTypes = TypeSystemDescriptionFactory.createTypeSystemDescription();
        List<TypeSystemDescription> allTypes = new ArrayList<>();
        allTypes.add(builtInTypes);
        allTypes.add(aType);
        jcas = JCasFactory.createJCas(CasCreationUtils.mergeTypeSystems(allTypes));
    } else {
        jcas = JCasFactory.createJCas();
    }
    reader.getNext(jcas.getCas());
    return jcas;
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) TypeSystemDescription(org.apache.uima.resource.metadata.TypeSystemDescription) ArrayList(java.util.ArrayList) JCas(org.apache.uima.jcas.JCas)

Aggregations

CollectionReader (org.apache.uima.collection.CollectionReader)35 JCas (org.apache.uima.jcas.JCas)28 ArrayList (java.util.ArrayList)25 TextClassificationOutcome (org.dkpro.tc.api.type.TextClassificationOutcome)15 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)14 Test (org.junit.Test)13 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)8 CAS (org.apache.uima.cas.CAS)7 File (java.io.File)5 TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)5 List (java.util.List)4 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)4 TextClassificationSequence (org.dkpro.tc.api.type.TextClassificationSequence)4 POS (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS)3 Lemma (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma)3 TypeSystemDescription (org.apache.uima.resource.metadata.TypeSystemDescription)3 Evaluator (de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.Evaluator)2 PossibleValue (de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.PossibleValue)2 ValuesGenerator (de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.ValuesGenerator)2 ConstraintsGrammar (de.tudarmstadt.ukp.clarin.webanno.constraints.grammar.ConstraintsGrammar)2