Search in sources :

Example 6 with POS

use of de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS in project webanno by webanno.

the class TcfReader method convertPos.

private void convertPos(JCas aJCas, TextCorpus aCorpusData, Map<String, Token> aTokens) {
    if (aCorpusData.getPosTagsLayer() == null) {
        return;
    }
    for (int i = 0; i < aCorpusData.getPosTagsLayer().size(); i++) {
        eu.clarin.weblicht.wlfxb.tc.api.Token[] posTokens = aCorpusData.getPosTagsLayer().getTokens(aCorpusData.getPosTagsLayer().getTag(i));
        String value = aCorpusData.getPosTagsLayer().getTag(i).getString();
        POS outPos = new POS(aJCas);
        outPos.setBegin(aTokens.get(posTokens[0].getID()).getBegin());
        outPos.setEnd(aTokens.get(posTokens[0].getID()).getEnd());
        outPos.setPosValue(value);
        outPos.addToIndexes();
        // Set the POS to the token
        aTokens.get(posTokens[0].getID()).setPos(outPos);
    }
}
Also used : POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)

Example 7 with POS

use of de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS in project webanno by webanno.

the class TcfWriter method writePosTags.

private void writePosTags(JCas aJCas, TextCorpus aTextCorpus, Map<Integer, eu.clarin.weblicht.wlfxb.tc.api.Token> aTokensBeginPositionMap) {
    if (!JCasUtil.exists(aJCas, POS.class)) {
        // Do nothing if there are no part-of-speech tags in the CAS
        getLogger().debug("Layer [" + TextCorpusLayerTag.POSTAGS.getXmlName() + "]: empty");
        return;
    }
    // Tokens layer must already exist
    TokensLayer tokensLayer = aTextCorpus.getTokensLayer();
    // create POS tag annotation layer
    String posTagSet = "STTS";
    for (TagsetDescription tagSet : select(aJCas, TagsetDescription.class)) {
        if (tagSet.getLayer().equals(POS.class.getName())) {
            posTagSet = tagSet.getName();
            break;
        }
    }
    PosTagsLayer posLayer = aTextCorpus.createPosTagsLayer(posTagSet);
    getLogger().debug("Layer [" + TextCorpusLayerTag.POSTAGS.getXmlName() + "]: created");
    int j = 0;
    for (Token coveredToken : select(aJCas, Token.class)) {
        POS pos = coveredToken.getPos();
        if (pos != null && posLayer != null) {
            String posValue = coveredToken.getPos().getPosValue();
            posLayer.addTag(posValue, tokensLayer.getToken(j));
        }
        j++;
    }
}
Also used : POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) PosTagsLayer(eu.clarin.weblicht.wlfxb.tc.api.PosTagsLayer) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) TokensLayer(eu.clarin.weblicht.wlfxb.tc.api.TokensLayer) TagsetDescription(de.tudarmstadt.ukp.dkpro.core.api.metadata.type.TagsetDescription)

Example 8 with POS

use of de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS in project webanno by webanno.

the class WebannoTsv1Reader method createToken.

/**
 * Create {@link Token} in the {@link CAS}. If the lemma and pos columns are not empty it will
 * create {@link Lemma} and {@link POS} annotations
 */
private void createToken(JCas aJCas, StringBuilder text, Map<Integer, String> tokens, Map<Integer, String> pos, Map<Integer, String> lemma, Map<String, Token> tokensStored) {
    int tokenBeginPosition = 0;
    int tokenEndPosition = 0;
    for (int i = 1; i <= tokens.size(); i++) {
        tokenBeginPosition = text.indexOf(tokens.get(i), tokenBeginPosition);
        Token outToken = new Token(aJCas, tokenBeginPosition, text.indexOf(tokens.get(i), tokenBeginPosition) + tokens.get(i).length());
        tokenEndPosition = text.indexOf(tokens.get(i), tokenBeginPosition) + tokens.get(i).length();
        tokenBeginPosition = tokenEndPosition;
        outToken.addToIndexes();
        // Add pos to CAS if exist
        if (!pos.get(i).equals("_")) {
            POS outPos = new POS(aJCas, outToken.getBegin(), outToken.getEnd());
            outPos.setPosValue(pos.get(i));
            outPos.addToIndexes();
            outToken.setPos(outPos);
        }
        // Add lemma if exist
        if (!lemma.get(i).equals("_")) {
            Lemma outLemma = new Lemma(aJCas, outToken.getBegin(), outToken.getEnd());
            outLemma.setValue(lemma.get(i));
            outLemma.addToIndexes();
            outToken.setLemma(outLemma);
        }
        tokensStored.put("t_" + i, outToken);
    }
}
Also used : POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) Lemma(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)

Example 9 with POS

use of de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS in project webanno by webanno.

the class CasDiff2Test method singleNoDifferencesWithAdditionalCas1Test.

@Test
public void singleNoDifferencesWithAdditionalCas1Test() throws Exception {
    JCas user1 = JCasFactory.createJCas();
    user1.setDocumentText("test");
    JCas user2 = JCasFactory.createJCas();
    user2.setDocumentText("test");
    JCas user3 = JCasFactory.createJCas();
    user3.setDocumentText("test");
    POS pos3 = new POS(user3, 0, 4);
    pos3.setPosValue("test");
    pos3.addToIndexes();
    Map<String, List<JCas>> casByUser = new LinkedHashMap<>();
    casByUser.put("user1", asList(user1));
    casByUser.put("user2", asList(user2));
    casByUser.put("user3", asList(user3));
    List<String> entryTypes = asList(POS.class.getName());
    List<SpanDiffAdapter> diffAdapters = asList(SpanDiffAdapter.POS);
    DiffResult result = CasDiff2.doDiff(entryTypes, diffAdapters, LinkCompareBehavior.LINK_TARGET_AS_LABEL, casByUser);
    result.print(System.out);
    casByUser.remove("user3");
    AgreementResult agreement = AgreementUtils.getAgreement(ConcreteAgreementMeasure.KRIPPENDORFF_ALPHA_NOMINAL_AGREEMENT, false, result, entryTypes.get(0), "PosValue", casByUser);
    assertEquals(1, agreement.getTotalSetCount());
    assertEquals(1, agreement.getIrrelevantSets().size());
    assertEquals(0, agreement.getRelevantSetCount());
}
Also used : AgreementResult(de.tudarmstadt.ukp.clarin.webanno.curation.agreement.AgreementUtils.AgreementResult) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) SpanDiffAdapter(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.SpanDiffAdapter) JCas(org.apache.uima.jcas.JCas) ArrayList(java.util.ArrayList) Arrays.asList(java.util.Arrays.asList) List(java.util.List) DiffResult(de.tudarmstadt.ukp.clarin.webanno.curation.casdiff.CasDiff2.DiffResult) LinkedHashMap(java.util.LinkedHashMap) Test(org.junit.Test)

Example 10 with POS

use of de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS in project webanno by webanno.

the class SymbolicRulesTest method testSimpleSymbolicRules2.

@Test
public void testSimpleSymbolicRules2() throws Exception {
    ConstraintsGrammar parser = new ConstraintsGrammar(new FileInputStream("src/test/resources/rules/symbolic2.rules"));
    Parse p = parser.Parse();
    ParsedConstraints constraints = p.accept(new ParserVisitor());
    JCas jcas = JCasFactory.createJCas();
    CollectionReader reader = createReader(Conll2006Reader.class, Conll2006Reader.PARAM_SOURCE_LOCATION, "src/test/resources/text/1.conll");
    reader.getNext(jcas.getCas());
    POS pos = new POS(jcas, 8, 9);
    pos.setPosValue("pronoun");
    pos.addToIndexes();
    Evaluator constraintsEvaluator = new ValuesGenerator();
    Lemma lemma = select(jcas, Lemma.class).iterator().next();
    List<PossibleValue> possibleValues = constraintsEvaluator.generatePossibleValues(lemma, "value", constraints);
    List<PossibleValue> expectedOutput = new ArrayList<>();
    expectedOutput.add(new PossibleValue("good", true));
    assertEquals(expectedOutput, possibleValues);
}
Also used : CollectionReader(org.apache.uima.collection.CollectionReader) Parse(de.tudarmstadt.ukp.clarin.webanno.constraints.grammar.syntaxtree.Parse) ParserVisitor(de.tudarmstadt.ukp.clarin.webanno.constraints.visitor.ParserVisitor) ArrayList(java.util.ArrayList) ParsedConstraints(de.tudarmstadt.ukp.clarin.webanno.constraints.model.ParsedConstraints) JCas(org.apache.uima.jcas.JCas) ValuesGenerator(de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.ValuesGenerator) Evaluator(de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.Evaluator) FileInputStream(java.io.FileInputStream) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) Lemma(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma) PossibleValue(de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.PossibleValue) ConstraintsGrammar(de.tudarmstadt.ukp.clarin.webanno.constraints.grammar.ConstraintsGrammar) Test(org.junit.Test)

Aggregations

POS (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS)35 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)21 ArrayList (java.util.ArrayList)15 JCas (org.apache.uima.jcas.JCas)14 Test (org.junit.Test)12 Lemma (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma)11 Dependency (de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency)9 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)8 List (java.util.List)8 Type (org.apache.uima.cas.Type)8 AnnotationFeature (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationFeature)7 MorphologicalFeatures (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.morph.MorphologicalFeatures)7 LinkedHashMap (java.util.LinkedHashMap)7 Feature (org.apache.uima.cas.Feature)7 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)7 Stem (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem)5 HashMap (java.util.HashMap)5 Evaluator (de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.Evaluator)3 PossibleValue (de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.PossibleValue)3 ValuesGenerator (de.tudarmstadt.ukp.clarin.webanno.constraints.evaluator.ValuesGenerator)3