Search in sources :

Example 11 with LogMessage

use of de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor.LogMessage in project webanno by webanno.

the class RelationOffsetsRepair method repair.

@Override
public void repair(Project aProject, CAS aCas, List<LogMessage> aMessages) {
    List<AnnotationFS> fixedRels = new ArrayList<>();
    for (AnnotationLayer layer : annotationService.listAnnotationLayer(aProject)) {
        if (!WebAnnoConst.RELATION_TYPE.equals(layer.getType())) {
            continue;
        }
        Type type;
        try {
            type = getType(aCas, layer.getName());
        } catch (IllegalArgumentException e) {
            // can skip checking the layer because there will be no annotations anyway.
            continue;
        }
        for (AnnotationFS rel : select(aCas, type)) {
            AnnotationFS target = getFeature(rel, WebAnnoConst.FEAT_REL_TARGET, AnnotationFS.class);
            if ((rel.getBegin() != target.getBegin()) || (rel.getEnd() != target.getEnd())) {
                fixedRels.add(rel);
                setFeature(rel, CAS.FEATURE_BASE_NAME_BEGIN, target.getBegin());
                setFeature(rel, CAS.FEATURE_BASE_NAME_END, target.getEnd());
            }
        }
        // Delete those relations that pointed to deleted spans
        if (!fixedRels.isEmpty()) {
            aMessages.add(new LogMessage(this, LogLevel.INFO, "Fixed the offsets of [%d] relations in layer [" + layer.getName() + "].", fixedRels.size()));
        }
    }
}
Also used : AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) CasUtil.getType(org.apache.uima.fit.util.CasUtil.getType) LogMessage(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor.LogMessage) ArrayList(java.util.ArrayList) AnnotationLayer(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationLayer)

Example 12 with LogMessage

use of de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor.LogMessage in project webanno by webanno.

the class RemoveDanglingChainLinksRepair method repair.

@Override
public void repair(Project aProject, CAS aCas, List<LogMessage> aMessages) {
    for (AnnotationLayer layer : annotationService.listAnnotationLayer(aProject)) {
        if (!WebAnnoConst.CHAIN_TYPE.equals(layer.getType())) {
            continue;
        }
        List<FeatureStructure> chains = new ArrayList<>(selectFS(aCas, getType(aCas, layer.getName() + "Chain")));
        List<AnnotationFS> links = new ArrayList<>(select(aCas, getType(aCas, layer.getName() + "Link")));
        for (FeatureStructure chain : chains) {
            AnnotationFS link = FSUtil.getFeature(chain, "first", AnnotationFS.class);
            while (link != null) {
                links.remove(link);
                link = FSUtil.getFeature(link, "next", AnnotationFS.class);
            }
        }
        // Delete those relations that pointed to deleted spans
        if (!links.isEmpty()) {
            links.forEach(aCas::removeFsFromIndexes);
            aMessages.add(new LogMessage(this, LogLevel.INFO, "Removed [%d] dangling links in layer [" + layer.getName() + "].", links.size()));
        }
    }
}
Also used : FeatureStructure(org.apache.uima.cas.FeatureStructure) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) LogMessage(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor.LogMessage) ArrayList(java.util.ArrayList) AnnotationLayer(de.tudarmstadt.ukp.clarin.webanno.model.AnnotationLayer)

Example 13 with LogMessage

use of de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor.LogMessage in project webanno by webanno.

the class RemoveDanglingRelationsRepair method repair.

@Override
public void repair(Project aProject, CAS aCas, List<LogMessage> aMessages) {
    Set<FeatureStructure> nonIndexed = getNonIndexedFSes(aCas);
    Set<FeatureStructure> toDelete = new LinkedHashSet<>();
    for (AnnotationFS fs : aCas.getAnnotationIndex()) {
        Type t = fs.getType();
        Feature sourceFeat = t.getFeatureByBaseName(WebAnnoConst.FEAT_REL_SOURCE);
        Feature targetFeat = t.getFeatureByBaseName(WebAnnoConst.FEAT_REL_TARGET);
        // Is this a relation?
        if (!(sourceFeat != null && targetFeat != null)) {
            continue;
        }
        FeatureStructure source = fs.getFeatureValue(sourceFeat);
        FeatureStructure target = fs.getFeatureValue(targetFeat);
        // Does it point to deleted spans?
        if (nonIndexed.contains(source) || nonIndexed.contains(target)) {
            toDelete.add(fs);
        }
    }
    // Delete those relations that pointed to deleted spans
    if (!toDelete.isEmpty()) {
        toDelete.forEach(aCas::removeFsFromIndexes);
        aMessages.add(new LogMessage(this, LogLevel.INFO, "Removed [%d] dangling relations.", nonIndexed.size()));
    }
}
Also used : FeatureStructure(org.apache.uima.cas.FeatureStructure) LinkedHashSet(java.util.LinkedHashSet) AnnotationFS(org.apache.uima.cas.text.AnnotationFS) Type(org.apache.uima.cas.Type) LogMessage(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor.LogMessage) Feature(org.apache.uima.cas.Feature)

Example 14 with LogMessage

use of de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor.LogMessage in project webanno by webanno.

the class RemoveZeroSizeTokensAndSentencesRepair method repair.

@Override
public void repair(Project aProject, CAS aCas, List<LogMessage> aMessages) {
    try {
        for (Sentence s : select(aCas.getJCas(), Sentence.class)) {
            if (s.getBegin() >= s.getEnd()) {
                s.removeFromIndexes();
                aMessages.add(new LogMessage(this, LogLevel.INFO, "Removed sentence with illegal span: %s", s));
            }
        }
        for (Token t : select(aCas.getJCas(), Token.class)) {
            if (t.getBegin() >= t.getEnd()) {
                Lemma lemma = t.getLemma();
                if (lemma != null) {
                    lemma.removeFromIndexes();
                    aMessages.add(new LogMessage(this, LogLevel.INFO, "Removed lemma attached to token with illegal span: %s", t));
                }
                POS pos = t.getPos();
                if (pos != null) {
                    pos.removeFromIndexes();
                    aMessages.add(new LogMessage(this, LogLevel.INFO, "Removed POS attached to token with illegal span: %s", t));
                }
                Stem stem = t.getStem();
                if (stem != null) {
                    stem.removeFromIndexes();
                    aMessages.add(new LogMessage(this, LogLevel.INFO, "Removed stem attached to token with illegal span: %s", t));
                }
                t.removeFromIndexes();
                aMessages.add(new LogMessage(this, LogLevel.INFO, "Removed token with illegal span: %s", t));
            }
        }
    } catch (CASException e) {
        log.error("Unabled to access JCas", e);
        aMessages.add(new LogMessage(this, LogLevel.ERROR, "Unabled to access JCas", e.getMessage()));
    }
}
Also used : LogMessage(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor.LogMessage) POS(de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS) Lemma(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) CASException(org.apache.uima.cas.CASException) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) Stem(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Stem)

Example 15 with LogMessage

use of de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor.LogMessage in project webanno by webanno.

the class RemoveDanglingRelationsRepairTest method test.

@Test
public void test() throws Exception {
    JCas jcas = JCasFactory.createJCas();
    jcas.setDocumentText("This is a test.");
    Token span1 = new Token(jcas, 0, 4);
    span1.addToIndexes();
    Token span2 = new Token(jcas, 6, 8);
    Dependency dep = new Dependency(jcas, 0, 8);
    dep.setGovernor(span1);
    dep.setDependent(span2);
    dep.addToIndexes();
    List<LogMessage> messages = new ArrayList<>();
    CasDoctor cd = new CasDoctor(RemoveDanglingRelationsRepair.class, AllFeatureStructuresIndexedCheck.class);
    // A project is not required for this check
    boolean result = cd.analyze(null, jcas.getCas(), messages);
    // A project is not required for this repair
    cd.repair(null, jcas.getCas(), messages);
    assertFalse(result);
    messages.forEach(System.out::println);
}
Also used : LogMessage(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor.LogMessage) ArrayList(java.util.ArrayList) CasDoctor(de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor) JCas(org.apache.uima.jcas.JCas) Token(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token) Dependency(de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency) Test(org.junit.Test)

Aggregations

LogMessage (de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor.LogMessage)17 AnnotationFS (org.apache.uima.cas.text.AnnotationFS)11 AnnotationLayer (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationLayer)8 ArrayList (java.util.ArrayList)7 Type (org.apache.uima.cas.Type)7 CasDoctor (de.tudarmstadt.ukp.clarin.webanno.diag.CasDoctor)5 FeatureStructure (org.apache.uima.cas.FeatureStructure)4 CasUtil.getType (org.apache.uima.fit.util.CasUtil.getType)4 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)3 JCas (org.apache.uima.jcas.JCas)3 Test (org.junit.Test)3 AnnotationDocument (de.tudarmstadt.ukp.clarin.webanno.model.AnnotationDocument)2 Project (de.tudarmstadt.ukp.clarin.webanno.model.Project)2 SourceDocument (de.tudarmstadt.ukp.clarin.webanno.model.SourceDocument)2 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)2 CAS (org.apache.uima.cas.CAS)2 CASException (org.apache.uima.cas.CASException)2 TypeDescription (org.apache.uima.resource.metadata.TypeDescription)2 TypeSystemDescription (org.apache.uima.resource.metadata.TypeSystemDescription)2 POS (de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS)1