Search in sources :

Example 1 with TokenBuilder

use of org.apache.uima.fit.testing.factory.TokenBuilder in project webanno by webanno.

the class ConstraintsGeneratorTest method makeJCasOneSentence.

private JCas makeJCasOneSentence() throws UIMAException {
    TypeSystemDescription global = TypeSystemDescriptionFactory.createTypeSystemDescription();
    TypeSystemDescription local = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath("src/test/resources/desc/types/webannoTestTypes.xml");
    TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(asList(global, local));
    JCas jcas = JCasFactory.createJCas(merged);
    DocumentMetaData.create(jcas).setDocumentId("doc");
    TokenBuilder<Token, Sentence> tb = new TokenBuilder<>(Token.class, Sentence.class);
    tb.buildTokens(jcas, "This is a test .");
    return jcas;
}
Also used : TokenBuilder(org.apache.uima.fit.testing.factory.TokenBuilder) TypeSystemDescription(org.apache.uima.resource.metadata.TypeSystemDescription) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)

Example 2 with TokenBuilder

use of org.apache.uima.fit.testing.factory.TokenBuilder in project webanno by webanno.

the class WebAnnoTsv3WriterTestBase method makeJCasTwoSentences.

private static JCas makeJCasTwoSentences() throws UIMAException {
    JCas jcas = makeJCas();
    TokenBuilder<Token, Sentence> tb = new TokenBuilder<>(Token.class, Sentence.class);
    tb.buildTokens(jcas, "He loves her .\nShe loves him not .");
    assertEquals(2, select(jcas, Sentence.class).size());
    return jcas;
}
Also used : TokenBuilder(org.apache.uima.fit.testing.factory.TokenBuilder) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)

Example 3 with TokenBuilder

use of org.apache.uima.fit.testing.factory.TokenBuilder in project webanno by webanno.

the class WebAnnoTsv3WriterTestBase method makeJCasOneSentence.

private static JCas makeJCasOneSentence(String aText) throws UIMAException {
    JCas jcas = makeJCas();
    TokenBuilder<Token, Sentence> tb = new TokenBuilder<>(Token.class, Sentence.class);
    tb.buildTokens(jcas, aText);
    // sentence break
    for (Sentence s : select(jcas, Sentence.class)) {
        s.removeFromIndexes();
    }
    // Add a new sentence covering the whole text
    new Sentence(jcas, 0, jcas.getDocumentText().length()).addToIndexes();
    return jcas;
}
Also used : TokenBuilder(org.apache.uima.fit.testing.factory.TokenBuilder) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)

Example 4 with TokenBuilder

use of org.apache.uima.fit.testing.factory.TokenBuilder in project dkpro-tc by dkpro.

the class SimilarityPairFeatureTest method similarityPairFeatureTest.

@Test
public void similarityPairFeatureTest() throws Exception {
    ExternalResourceDescription gstResource = ExternalResourceFactory.createExternalResourceDescription(GreedyStringTilingMeasureResource.class, GreedyStringTilingMeasureResource.PARAM_MIN_MATCH_LENGTH, "3");
    AnalysisEngineDescription desc = createEngineDescription(NoOpAnnotator.class);
    AnalysisEngine engine = createEngine(desc);
    JCas jcas = engine.newJCas();
    TokenBuilder<Token, Sentence> tb = new TokenBuilder<Token, Sentence>(Token.class, Sentence.class);
    JCas view1 = jcas.createView(VIEW1);
    view1.setDocumentLanguage("en");
    tb.buildTokens(view1, "This is a test .");
    JCas view2 = jcas.createView(VIEW2);
    view2.setDocumentLanguage("en");
    tb.buildTokens(view2, "Test is this .");
    engine.process(jcas);
    SimilarityPairFeatureExtractor extractor = FeatureUtil.createResource(SimilarityPairFeatureExtractor.class, SimilarityPairFeatureExtractor.PARAM_UNIQUE_EXTRACTOR_NAME, "123", SimilarityPairFeatureExtractor.PARAM_SEGMENT_FEATURE_PATH, Token.class.getName(), SimilarityPairFeatureExtractor.PARAM_TEXT_SIMILARITY_RESOURCE, gstResource);
    Set<Feature> features = extractor.extract(jcas.getView(VIEW1), jcas.getView(VIEW2));
    Assert.assertEquals(1, features.size());
    Iterator<Feature> iter = features.iterator();
    assertFeature("SimilarityGreedyStringTiling_3", 0.8125, iter.next(), 0.0001);
}
Also used : TokenBuilder(org.apache.uima.fit.testing.factory.TokenBuilder) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) FeatureTestUtil.assertFeature(org.dkpro.tc.testing.FeatureTestUtil.assertFeature) Feature(org.dkpro.tc.api.features.Feature) SimilarityPairFeatureExtractor(org.dkpro.tc.features.pair.similarity.SimilarityPairFeatureExtractor) ExternalResourceDescription(org.apache.uima.resource.ExternalResourceDescription) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 5 with TokenBuilder

use of org.apache.uima.fit.testing.factory.TokenBuilder in project webanno by webanno.

the class CasDiff2Test method relationStackedSpansTest.

@Test
public void relationStackedSpansTest() throws Exception {
    TypeSystemDescription global = TypeSystemDescriptionFactory.createTypeSystemDescription();
    TypeSystemDescription local = TypeSystemDescriptionFactory.createTypeSystemDescriptionFromPath("src/test/resources/desc/type/webannoTestTypes.xml");
    TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(asList(global, local));
    TokenBuilder<Token, Sentence> tb = new TokenBuilder<>(Token.class, Sentence.class);
    JCas jcasA = JCasFactory.createJCas(merged);
    {
        CAS casA = jcasA.getCas();
        tb.buildTokens(jcasA, "This is a test .");
        List<Token> tokensA = new ArrayList<>(select(jcasA, Token.class));
        Token t1A = tokensA.get(0);
        Token t2A = tokensA.get(tokensA.size() - 1);
        NamedEntity govA = new NamedEntity(jcasA, t1A.getBegin(), t1A.getEnd());
        govA.addToIndexes();
        // Here we add a stacked named entity!
        new NamedEntity(jcasA, t1A.getBegin(), t1A.getEnd()).addToIndexes();
        NamedEntity depA = new NamedEntity(jcasA, t2A.getBegin(), t2A.getEnd());
        depA.addToIndexes();
        Type relationTypeA = casA.getTypeSystem().getType("webanno.custom.Relation");
        AnnotationFS fs1A = casA.createAnnotation(relationTypeA, depA.getBegin(), depA.getEnd());
        FSUtil.setFeature(fs1A, "Governor", govA);
        FSUtil.setFeature(fs1A, "Dependent", depA);
        FSUtil.setFeature(fs1A, "value", "REL");
        casA.addFsToIndexes(fs1A);
    }
    JCas jcasB = JCasFactory.createJCas(merged);
    {
        CAS casB = jcasB.getCas();
        tb.buildTokens(jcasB, "This is a test .");
        List<Token> tokensB = new ArrayList<>(select(jcasB, Token.class));
        Token t1B = tokensB.get(0);
        Token t2B = tokensB.get(tokensB.size() - 1);
        NamedEntity govB = new NamedEntity(jcasB, t1B.getBegin(), t1B.getEnd());
        govB.addToIndexes();
        NamedEntity depB = new NamedEntity(jcasB, t2B.getBegin(), t2B.getEnd());
        depB.addToIndexes();
        Type relationTypeB = casB.getTypeSystem().getType("webanno.custom.Relation");
        AnnotationFS fs1B = casB.createAnnotation(relationTypeB, depB.getBegin(), depB.getEnd());
        FSUtil.setFeature(fs1B, "Governor", govB);
        FSUtil.setFeature(fs1B, "Dependent", depB);
        FSUtil.setFeature(fs1B, "value", "REL");
        casB.addFsToIndexes(fs1B);
    }
    Map<String, List<JCas>> casByUser = new LinkedHashMap<>();
    casByUser.put("user1", asList(jcasA));
    casByUser.put("user2", asList(jcasB));
    List<String> entryTypes = asList("webanno.custom.Relation");
    List<? extends DiffAdapter> diffAdapters = asList(new ArcDiffAdapter("webanno.custom.Relation", WebAnnoConst.FEAT_REL_TARGET, WebAnnoConst.FEAT_REL_SOURCE, "value"));
    DiffResult diff = CasDiff2.doDiff(entryTypes, diffAdapters, LinkCompareBehavior.LINK_TARGET_AS_LABEL, casByUser);
    diff.print(System.out);
    assertEquals(1, diff.size());
    assertEquals(0, diff.getDifferingConfigurationSets().size());
    assertEquals(0, diff.getIncompleteConfigurationSets().size());
    // Check against new impl
    AgreementResult agreement = AgreementUtils.getCohenKappaAgreement(diff, "webanno.custom.Relation", "value", casByUser);
    // Asserts
    System.out.printf("Agreement: %s%n", agreement.toString());
    AgreementUtils.dumpAgreementStudy(System.out, agreement);
    assertEquals(1, agreement.getPluralitySets().size());
}
Also used : AgreementResult(de.tudarmstadt.ukp.clarin.webanno.curation.agreement.AgreementUtils.AgreementResult) TokenBuilder(org.apache.uima.fit.testing.factory.TokenBuilder) TypeSystemDescription(org.apache.uima.resource.metadata.TypeSystemDescription) ArcDiffAdapter(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.ArcDiffAdapter) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) LinkedHashMap(java.util.LinkedHashMap) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) NamedEntity(de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity) Type(org.apache.uima.cas.Type) CAS(org.apache.uima.cas.CAS) ArrayList(java.util.ArrayList) Arrays.asList(java.util.Arrays.asList) List(java.util.List) DiffResult(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.DiffResult) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) Test(org.junit.Test)

Aggregations

Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)7 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)7 TokenBuilder (org.apache.uima.fit.testing.factory.TokenBuilder)7 JCas (org.apache.uima.jcas.JCas)7 TypeSystemDescription (org.apache.uima.resource.metadata.TypeSystemDescription)3 Test (org.junit.Test)2 AgreementResult (de.tudarmstadt.ukp.clarin.webanno.curation.agreement.AgreementUtils.AgreementResult)1 ArcDiffAdapter (de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.ArcDiffAdapter)1 DiffResult (de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.DiffResult)1 NamedEntity (de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity)1 ArrayList (java.util.ArrayList)1 Arrays.asList (java.util.Arrays.asList)1 LinkedHashMap (java.util.LinkedHashMap)1 List (java.util.List)1 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)1 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)1 CAS (org.apache.uima.cas.CAS)1 Type (org.apache.uima.cas.Type)1 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)1 ExternalResourceDescription (org.apache.uima.resource.ExternalResourceDescription)1