Search in sources :

Example 6 with SDocumentGraph

use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.

the class EventExtractor method computeDisplayAnnotations.

/**
 * Returns the annotations to display according to the mappings configuration.
 *
 * This will check the "annos" and "annos_regex" paramters for determining.
 * the annotations to display. It also iterates over all nodes of the graph
 * matching the type.
 *
 * @param input The input for the visualizer.
 * @param type Which type of nodes to include
 * @return
 */
public static List<String> computeDisplayAnnotations(VisualizerInput input, Class<? extends SNode> type) {
    if (input == null) {
        return new LinkedList<>();
    }
    SDocumentGraph graph = input.getDocument().getDocumentGraph();
    Set<String> annoPool = SToken.class.isAssignableFrom(type) ? getAnnotationLevelSet(graph, null, type) : getAnnotationLevelSet(graph, input.getNamespace(), type);
    List<String> annos = new LinkedList<>(annoPool);
    String annosConfiguration = input.getMappings().getProperty(MAPPING_ANNOS_KEY);
    if (annosConfiguration != null && annosConfiguration.trim().length() > 0) {
        String[] split = annosConfiguration.split(",");
        annos.clear();
        for (String s : split) {
            s = s.trim();
            // is regular expression?
            if (s.startsWith("/") && s.endsWith("/")) {
                // go over all remaining items in our pool of all annotations and
                // check if they match
                Pattern regex = Pattern.compile(StringUtils.strip(s, "/"));
                LinkedList<String> matchingAnnos = new LinkedList<>();
                for (String a : annoPool) {
                    if (regex.matcher(a).matches()) {
                        matchingAnnos.add(a);
                    }
                }
                annos.addAll(matchingAnnos);
                annoPool.removeAll(matchingAnnos);
            } else {
                annos.add(s);
                annoPool.remove(s);
            }
        }
    }
    // filter already found annotation names by regular expression
    // if this was given as mapping
    String regexFilterRaw = input.getMappings().getProperty(MAPPING_ANNO_REGEX_KEY);
    if (regexFilterRaw != null) {
        try {
            Pattern regexFilter = Pattern.compile(regexFilterRaw);
            ListIterator<String> itAnnos = annos.listIterator();
            while (itAnnos.hasNext()) {
                String a = itAnnos.next();
                // remove entry if not matching
                if (!regexFilter.matcher(a).matches()) {
                    itAnnos.remove();
                }
            }
        } catch (PatternSyntaxException ex) {
            log.warn("invalid regular expression in mapping for grid visualizer", ex);
        }
    }
    return annos;
}
Also used : SToken(org.corpus_tools.salt.common.SToken) Pattern(java.util.regex.Pattern) SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) LinkedList(java.util.LinkedList) PatternSyntaxException(java.util.regex.PatternSyntaxException)

Example 7 with SDocumentGraph

use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.

the class RSTImpl method getText.

/**
 * Gets the overlapping token as string from a node, which are direct
 * dominated by this node.
 *
 * @param currNode
 * @return is null, if there is no relation to a token, or there is more then
 * one STEXT is overlapped by this node
 */
private String getText(SToken currNode) {
    List<DataSourceSequence> sSequences = ((SDocumentGraph) currNode.getGraph()).getOverlappedDataSourceSequence(currNode, SALT_TYPE.STEXT_OVERLAPPING_RELATION);
    // only support one text for spanns
    if (sSequences == null || sSequences.size() != 1) {
        log.error("rst supports only one text and only text level");
        return null;
    }
    log.debug("sSequences {}", sSequences.toString());
    /**
     * Check if it is a text data structure. As described in the salt manual in
     * chapter "5.8 More specific nodes and relations" the start and end point
     * of a range of token is stored in superordinate node of type SSequentialDS
     */
    if (sSequences.get(0).getDataSource() instanceof STextualDS) {
        STextualDS text = ((STextualDS) sSequences.get(0).getDataSource());
        int start = sSequences.get(0).getStart().intValue();
        int end = sSequences.get(0).getEnd().intValue();
        return text.getText().substring(start, end);
    }
    // something fundamentally goes wrong
    log.error("{} instead of {}", sSequences.get(0).getDataSource().getClass().getName(), STextualDS.class.getName());
    return null;
}
Also used : SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) STextualDS(org.corpus_tools.salt.common.STextualDS) DataSourceSequence(org.corpus_tools.salt.util.DataSourceSequence)

Example 8 with SDocumentGraph

use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.

the class SaltAnnotateExtractorTest method testLayerNodes.

@Test
public void testLayerNodes() throws SQLException {
    SaltProject project = instance.extractData(resultSetProviderSingleText.getResultSet());
    assertNotNull(project);
    SDocumentGraph g = project.getCorpusGraphs().get(0).getDocuments().get(0).getDocumentGraph();
    List<SNode> n = new ArrayList<>(g.getLayerByName("exmaralda").get(0).getNodes());
    Collections.sort(n, new NameComparator());
    assertEquals(9, n.size());
    assertEquals("Focus_newInfSeg_10", n.get(0).getName());
    assertEquals("Focus_newInfSeg_9", n.get(1).getName());
    assertEquals("Inf-StatSeg_29", n.get(2).getName());
    assertEquals("Inf-StatSeg_30", n.get(3).getName());
    assertEquals("NPSeg_29", n.get(4).getName());
    assertEquals("NPSeg_30", n.get(5).getName());
    assertEquals("PPSeg_7", n.get(6).getName());
    assertEquals("SentSeg_10", n.get(7).getName());
    assertEquals("SentSeg_9", n.get(8).getName());
    n = new ArrayList<>(g.getLayerByName("mmax").get(0).getNodes());
    Collections.sort(n, new NameComparator());
    assertEquals(5, n.size());
    assertEquals("primmarkSeg_1000154", n.get(0).getName());
    assertEquals("primmarkSeg_60", n.get(1).getName());
    assertEquals("sentenceSeg_50010", n.get(2).getName());
    assertEquals("sentenceSeg_50011", n.get(3).getName());
    assertEquals("sentenceSeg_5009", n.get(4).getName());
    n = new ArrayList<>(g.getLayerByName("tiger").get(0).getNodes());
    Collections.sort(n, new NameComparator());
    assertEquals(10, n.size());
    assertEquals("const_50", n.get(0).getName());
    assertEquals("const_52", n.get(1).getName());
    assertEquals("const_54", n.get(2).getName());
    assertEquals("const_55", n.get(3).getName());
    assertEquals("const_56", n.get(4).getName());
    assertEquals("const_57", n.get(5).getName());
    assertEquals("const_58", n.get(6).getName());
    assertEquals("const_59", n.get(7).getName());
    assertEquals("const_60", n.get(8).getName());
    assertEquals("const_61", n.get(9).getName());
    n = new ArrayList<>(g.getLayerByName("default_ns").get(0).getNodes());
    Collections.sort(n, new NameComparator());
    assertEquals(12, n.size());
    assertEquals("tok_150", n.get(0).getName());
    assertEquals("tok_151", n.get(1).getName());
    assertEquals("tok_152", n.get(2).getName());
    assertEquals("tok_153", n.get(3).getName());
    assertEquals("tok_154", n.get(4).getName());
    assertEquals("tok_155", n.get(5).getName());
    assertEquals("tok_156", n.get(6).getName());
    assertEquals("tok_157", n.get(7).getName());
    assertEquals("tok_158", n.get(8).getName());
    assertEquals("tok_159", n.get(9).getName());
    assertEquals("tok_160", n.get(10).getName());
    assertEquals("tok_161", n.get(11).getName());
    n = new ArrayList<>(g.getLayerByName("rst").get(0).getNodes());
    Collections.sort(n, new NameComparator());
    assertEquals(9, n.size());
    assertEquals("u0", n.get(0).getName());
    assertEquals("u10", n.get(1).getName());
    assertEquals("u11", n.get(2).getName());
    assertEquals("u12", n.get(3).getName());
    assertEquals("u20", n.get(4).getName());
    assertEquals("u23", n.get(5).getName());
    assertEquals("u24", n.get(6).getName());
    assertEquals("u27", n.get(7).getName());
    assertEquals("u28", n.get(8).getName());
    assertEquals(0, g.getLayerByName("dep").get(0).getNodes().size());
}
Also used : SNode(org.corpus_tools.salt.core.SNode) SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) ArrayList(java.util.ArrayList) SaltProject(org.corpus_tools.salt.common.SaltProject) Test(org.junit.Test)

Example 9 with SDocumentGraph

use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.

the class SaltAnnotateExtractorTest method testMultipleTextGeneration.

@Test
public void testMultipleTextGeneration() throws SQLException {
    SaltProject project = instance.extractData(resultSetProviderMultiText.getResultSet());
    assertNotNull(project);
    SDocumentGraph g = project.getCorpusGraphs().get(0).getDocuments().get(0).getDocumentGraph();
    assertEquals(3, g.getTextualDSs().size());
}
Also used : SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) SaltProject(org.corpus_tools.salt.common.SaltProject) Test(org.junit.Test)

Example 10 with SDocumentGraph

use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.

the class LegacyGraphConverter method convertToAnnotationGraph.

public static AnnotationGraph convertToAnnotationGraph(SDocument document) {
    SDocumentGraph docGraph = document.getDocumentGraph();
    SFeature featMatchedIDs = docGraph.getFeature(ANNIS_NS, FEAT_MATCHEDIDS);
    Match match = new Match();
    if (featMatchedIDs != null && featMatchedIDs.getValue_STEXT() != null) {
        match = Match.parseFromString(featMatchedIDs.getValue_STEXT(), ',');
    }
    // get matched node names by using the IDs
    List<Long> matchedNodeIDs = new ArrayList<>();
    for (URI u : match.getSaltIDs()) {
        SNode node = docGraph.getNode(u.toASCIIString());
        if (node == null) {
            // that's weird, fallback to the id
            log.warn("Could not get matched node from id {}", u.toASCIIString());
            matchedNodeIDs.add(-1l);
        } else {
            RelannisNodeFeature relANNISFeat = (RelannisNodeFeature) node.getFeature(SaltUtil.createQName(ANNIS_NS, FEAT_RELANNIS_NODE)).getValue();
            matchedNodeIDs.add(relANNISFeat.getInternalID());
        }
    }
    AnnotationGraph result = convertToAnnotationGraph(docGraph, matchedNodeIDs);
    return result;
}
Also used : AnnotationGraph(annis.model.AnnotationGraph) RelannisNodeFeature(annis.model.RelannisNodeFeature) SNode(org.corpus_tools.salt.core.SNode) SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) ArrayList(java.util.ArrayList) URI(java.net.URI) SFeature(org.corpus_tools.salt.core.SFeature) Match(annis.service.objects.Match)

Aggregations

SDocumentGraph (org.corpus_tools.salt.common.SDocumentGraph)24 SNode (org.corpus_tools.salt.core.SNode)12 ArrayList (java.util.ArrayList)9 SDocument (org.corpus_tools.salt.common.SDocument)8 SaltProject (org.corpus_tools.salt.common.SaltProject)8 SCorpusGraph (org.corpus_tools.salt.common.SCorpusGraph)7 SToken (org.corpus_tools.salt.common.SToken)7 LinkedList (java.util.LinkedList)6 Test (org.junit.Test)6 HashMap (java.util.HashMap)5 SRelation (org.corpus_tools.salt.core.SRelation)5 RelannisNodeFeature (annis.model.RelannisNodeFeature)4 DataSourceSequence (org.corpus_tools.salt.util.DataSourceSequence)4 TreeSet (java.util.TreeSet)3 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)3 Pattern (java.util.regex.Pattern)3 SCorpus (org.corpus_tools.salt.common.SCorpus)3 SFeature (org.corpus_tools.salt.core.SFeature)3 Row (annis.gui.widgets.grid.Row)2 IOException (java.io.IOException)2