Search in sources :

Example 16 with SToken

use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.

the class LegacyGraphConverter method convertToAnnotationGraph.

public static AnnotationGraph convertToAnnotationGraph(SDocumentGraph docGraph, List<Long> matchedNodeIDs) {
    Set<Long> matchSet = new HashSet<>(matchedNodeIDs);
    AnnotationGraph annoGraph = new AnnotationGraph();
    List<String> pathList = CommonHelper.getCorpusPath(docGraph.getDocument().getGraph(), docGraph.getDocument());
    annoGraph.setPath(pathList.toArray(new String[pathList.size()]));
    annoGraph.setDocumentName(docGraph.getDocument().getName());
    Map<SNode, AnnisNode> allNodes = new HashMap<>();
    for (SNode sNode : docGraph.getNodes()) {
        SFeature featNodeRaw = sNode.getFeature(SaltUtil.createQName(ANNIS_NS, FEAT_RELANNIS_NODE));
        if (featNodeRaw != null) {
            RelannisNodeFeature featNode = (RelannisNodeFeature) featNodeRaw.getValue();
            long internalID = featNode.getInternalID();
            AnnisNode aNode = new AnnisNode(internalID);
            for (SAnnotation sAnno : sNode.getAnnotations()) {
                aNode.addNodeAnnotation(new Annotation(sAnno.getNamespace(), sAnno.getName(), sAnno.getValue_STEXT()));
            }
            aNode.setName(sNode.getName());
            Set<SLayer> layers = sNode.getLayers();
            if (!layers.isEmpty()) {
                aNode.setNamespace(layers.iterator().next().getName());
            }
            RelannisNodeFeature feat = (RelannisNodeFeature) sNode.getFeature(SaltUtil.createQName(ANNIS_NS, FEAT_RELANNIS_NODE)).getValue();
            if (sNode instanceof SToken) {
                List<DataSourceSequence> seqList = docGraph.getOverlappedDataSourceSequence(sNode, SALT_TYPE.STEXT_OVERLAPPING_RELATION);
                if (seqList != null) {
                    DataSourceSequence seq = seqList.get(0);
                    Preconditions.checkNotNull(seq, "DataSourceSequence is null for token %s", sNode.getId());
                    SSequentialDS seqDS = seq.getDataSource();
                    Preconditions.checkNotNull(seqDS, "SSequentalDS is null for token %s", sNode.getId());
                    Preconditions.checkNotNull(seqDS.getData(), "SSequentalDS data is null for token %s", sNode.getId());
                    String seqDSData = (String) seqDS.getData();
                    Preconditions.checkNotNull(seqDSData, "casted SSequentalDS data is null for token %s", sNode.getId());
                    Preconditions.checkNotNull(seq.getStart(), "SSequentalDS start is null for token %s", sNode.getId());
                    Preconditions.checkNotNull(seq.getEnd(), "SSequentalDS end is null for supposed token %s", sNode.getId());
                    int start = seq.getStart().intValue();
                    int end = seq.getEnd().intValue();
                    Preconditions.checkState(start >= 0 && start <= end && end <= seqDSData.length(), "Illegal start or end of textual DS for token (start %s, end: %s)", sNode.getId(), start, end);
                    String spannedText = seqDSData.substring(start, end);
                    Preconditions.checkNotNull(spannedText, "spanned text is null for supposed token %s (start: %s, end: %s)", sNode.getId(), start, end);
                    aNode.setSpannedText(spannedText);
                    aNode.setToken(true);
                    aNode.setTokenIndex(feat.getTokenIndex());
                }
            } else {
                aNode.setToken(false);
                aNode.setTokenIndex(null);
            }
            aNode.setCorpus(feat.getCorpusRef());
            aNode.setTextId(feat.getTextRef());
            aNode.setLeft(feat.getLeft());
            aNode.setLeftToken(feat.getLeftToken());
            aNode.setRight(feat.getRight());
            aNode.setRightToken(feat.getRightToken());
            if (matchSet.contains(aNode.getId())) {
                aNode.setMatchedNodeInQuery((long) matchedNodeIDs.indexOf(aNode.getId()) + 1);
                annoGraph.getMatchedNodeIds().add(aNode.getId());
            } else {
                aNode.setMatchedNodeInQuery(null);
            }
            annoGraph.addNode(aNode);
            allNodes.put(sNode, aNode);
        }
    }
    for (SRelation rel : docGraph.getRelations()) {
        RelannisEdgeFeature featRelation = RelannisEdgeFeature.extract(rel);
        if (featRelation != null) {
            addRelation(rel, featRelation.getPre(), featRelation.getComponentID(), allNodes, annoGraph);
        }
    }
    // add relations with empty relation name for every dominance relation
    List<SDominanceRelation> dominanceRelations = new LinkedList<>(docGraph.getDominanceRelations());
    for (SDominanceRelation rel : dominanceRelations) {
        RelannisEdgeFeature featEdge = RelannisEdgeFeature.extract(rel);
        if (featEdge != null && featEdge.getArtificialDominanceComponent() != null && featEdge.getArtificialDominancePre() != null) {
            addRelation(SDominanceRelation.class, null, rel.getAnnotations(), rel.getSource(), rel.getTarget(), rel.getLayers(), featEdge.getArtificialDominancePre(), featEdge.getArtificialDominanceComponent(), allNodes, annoGraph);
        }
    }
    return annoGraph;
}
Also used : SLayer(org.corpus_tools.salt.core.SLayer) SNode(org.corpus_tools.salt.core.SNode) HashMap(java.util.HashMap) SToken(org.corpus_tools.salt.common.SToken) SRelation(org.corpus_tools.salt.core.SRelation) RelannisEdgeFeature(annis.model.RelannisEdgeFeature) SDominanceRelation(org.corpus_tools.salt.common.SDominanceRelation) HashSet(java.util.HashSet) RelannisNodeFeature(annis.model.RelannisNodeFeature) SAnnotation(org.corpus_tools.salt.core.SAnnotation) SSequentialDS(org.corpus_tools.salt.common.SSequentialDS) DataSourceSequence(org.corpus_tools.salt.util.DataSourceSequence) Annotation(annis.model.Annotation) SAnnotation(org.corpus_tools.salt.core.SAnnotation) LinkedList(java.util.LinkedList) AnnotationGraph(annis.model.AnnotationGraph) AnnisNode(annis.model.AnnisNode) SFeature(org.corpus_tools.salt.core.SFeature)

Example 17 with SToken

use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.

the class TimelineReconstructorTest method testBematacDialog.

/**
 * Tests a sample dialog reconstruction.
 * The dialog is this one: https://korpling.org/annis3/?id=44b60a56-31da-4469-b438-62fdb67f28f1
 *
 * The Salt which was generated by ANNIS is loaded and the virtual tokenization is removed.
 * It is checked if
 * <ul>
 * <li>the newly created tokenization is correct</li>
 * <li>spans cover the correct token</li>
 * </ul>
 */
@Test
public void testBematacDialog() {
    SDocumentGraph docGraph = SaltUtil.loadDocumentGraph(URI.createURI(getClass().getResource("SampleDialog.salt").toString()));
    Map<String, String> anno2order = new HashMap<>();
    anno2order.put("default_ns::instructee_utt", "instructee_dipl");
    anno2order.put("default_ns::instructor_utt", "instructor_dipl");
    TimelineReconstructor.removeVirtualTokenization(docGraph, anno2order);
    // instructor_dipl, instructor_norm, instructee_dipl, instructee_norm, instructee_extra, break
    List<STextualDS> texts = docGraph.getTextualDSs();
    assertEquals(6, texts.size());
    STextualDS instructorDipl = findTextualDSByName("instructor_dipl", texts);
    assertNotNull(instructorDipl);
    assertEquals("in Richtung des Toasters gehst ja gehst", instructorDipl.getText());
    DataSourceSequence<Integer> seq = new DataSourceSequence<>();
    seq.setDataSource(instructorDipl);
    seq.setStart(instructorDipl.getStart());
    seq.setEnd(instructorDipl.getEnd());
    List<SToken> instructorDiplToken = docGraph.getTokensBySequence(seq);
    assertEquals(7, instructorDiplToken.size());
    assertEquals("in", docGraph.getText(instructorDiplToken.get(0)));
    assertEquals("Richtung", docGraph.getText(instructorDiplToken.get(1)));
    assertEquals("des", docGraph.getText(instructorDiplToken.get(2)));
    assertEquals("Toasters", docGraph.getText(instructorDiplToken.get(3)));
    assertEquals("gehst", docGraph.getText(instructorDiplToken.get(4)));
    assertEquals("ja", docGraph.getText(instructorDiplToken.get(5)));
    assertEquals("gehst", docGraph.getText(instructorDiplToken.get(6)));
    // check that the other real spans are now connected with the token
    List<SNode> uttNode = docGraph.getNodesByName("sSpan1294");
    assertNotNull(uttNode);
    assertEquals(1, uttNode.size());
    SAnnotation uttAnno = uttNode.get(0).getAnnotation("default_ns::instructor_utt");
    assertNotNull(uttAnno);
    assertEquals("utt", uttAnno.getValue_STEXT());
    List<SRelation> uttOutRelations = uttNode.get(0).getOutRelations();
    assertNotNull(uttOutRelations);
    assertEquals(5, uttOutRelations.size());
    for (SRelation rel : uttOutRelations) {
        assertTrue(rel instanceof SSpanningRelation);
        assertEquals(instructorDipl, CommonHelper.getTextualDSForNode((SNode) rel.getTarget(), docGraph));
    }
    STextualDS instructorNorm = findTextualDSByName("instructor_norm", texts);
    assertNotNull(instructorNorm);
    assertEquals("in Richtung des Toasters gehst ja gehst", instructorNorm.getText());
    STextualDS instructeeDipl = findTextualDSByName("instructee_dipl", texts);
    assertNotNull(instructeeDipl);
    assertEquals("mhm ich geh in Richtung des Toasters okay", instructeeDipl.getText());
    STextualDS instructeeNorm = findTextualDSByName("instructee_norm", texts);
    assertNotNull(instructeeNorm);
    assertEquals("ich gehe in Richtung des Toasters okay", instructeeNorm.getText());
    STextualDS instructeeExtra = findTextualDSByName("instructee_extra", texts);
    assertNotNull(instructeeExtra);
    assertEquals("zeichnet", instructeeExtra.getText());
    STextualDS breakText = findTextualDSByName("break", texts);
    assertNotNull(breakText);
    assertEquals("0,7 0,5", breakText.getText());
}
Also used : SNode(org.corpus_tools.salt.core.SNode) HashMap(java.util.HashMap) SAnnotation(org.corpus_tools.salt.core.SAnnotation) DataSourceSequence(org.corpus_tools.salt.util.DataSourceSequence) SToken(org.corpus_tools.salt.common.SToken) SRelation(org.corpus_tools.salt.core.SRelation) SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) SSpanningRelation(org.corpus_tools.salt.common.SSpanningRelation) STextualDS(org.corpus_tools.salt.common.STextualDS) Test(org.junit.Test)

Example 18 with SToken

use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.

the class EventExtractor method parseSalt.

/**
 * Converts Salt document graph to rows.
 *
 * @param input
 * @param showSpanAnnos
 * @param showTokenAnnos
 * @param mediaLayer  A set of all annotation layers which should be treated as special media layer.
 * @param annotationNames
 * @param replaceValueWithMediaIcon If true the actual value is removed and an icon for playing the media file is shown instead.
 * @param startTokenIndex token index of the first token in the match
 * @param endTokenIndex token index of the last token in the match
 * @param pdfController makes status of all pdfviewer available for the
 * events.
 * @param text If non-null only include annotations for nodes of the specified text.
 * @return
 */
public static LinkedHashMap<String, ArrayList<Row>> parseSalt(VisualizerInput input, boolean showSpanAnnos, boolean showTokenAnnos, List<String> annotationNames, Set<String> mediaLayer, boolean replaceValueWithMediaIcon, long startTokenIndex, long endTokenIndex, PDFController pdfController, STextualDS text) {
    SDocumentGraph graph = input.getDocument().getDocumentGraph();
    // only look at annotations which were defined by the user
    LinkedHashMap<String, ArrayList<Row>> rowsByAnnotation = new LinkedHashMap<>();
    for (String anno : annotationNames) {
        rowsByAnnotation.put(anno, new ArrayList<Row>());
    }
    AtomicInteger eventCounter = new AtomicInteger();
    PDFPageHelper pageNumberHelper = new PDFPageHelper(input);
    if (showSpanAnnos) {
        for (SSpan span : graph.getSpans()) {
            if (text == null || text == CommonHelper.getTextualDSForNode(span, graph)) {
                addAnnotationsForNode(span, graph, startTokenIndex, endTokenIndex, pdfController, pageNumberHelper, eventCounter, rowsByAnnotation, true, mediaLayer, replaceValueWithMediaIcon);
            }
        }
    // end for each span
    }
    if (showTokenAnnos) {
        for (SToken tok : graph.getTokens()) {
            if (text == null || text == CommonHelper.getTextualDSForNode(tok, graph)) {
                addAnnotationsForNode(tok, graph, startTokenIndex, endTokenIndex, pdfController, pageNumberHelper, eventCounter, rowsByAnnotation, false, mediaLayer, replaceValueWithMediaIcon);
            }
        }
    }
    // 2. merge rows when possible
    for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
        mergeAllRowsIfPossible(e.getValue());
    }
    // 3. sort events on one row by left token index
    for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
        for (Row r : e.getValue()) {
            sortEventsByTokenIndex(r);
        }
    }
    // 4. split up events if they cover islands
    for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
        for (Row r : e.getValue()) {
            splitRowsOnIslands(r, graph, text, startTokenIndex, endTokenIndex);
        }
    }
    // 5. split up events if they have gaps
    for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
        for (Row r : e.getValue()) {
            splitRowsOnGaps(r, graph, startTokenIndex, endTokenIndex);
        }
    }
    return rowsByAnnotation;
}
Also used : SSpan(org.corpus_tools.salt.common.SSpan) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) SToken(org.corpus_tools.salt.common.SToken) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) PDFPageHelper(annis.libgui.PDFPageHelper) Row(annis.gui.widgets.grid.Row) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap)

Example 19 with SToken

use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.

the class RSTImpl method transformSaltToJSON.

private String transformSaltToJSON(VisualizerInput visInput) {
    graph = visInput.getSResult().getDocumentGraph();
    List<SNode> rootSNodes = graph.getRoots();
    List<SNode> rstRoots = new ArrayList<SNode>();
    for (SNode sNode : rootSNodes) {
        if (CommonHelper.checkSLayer(namespace, sNode)) {
            rstRoots.add(sNode);
        }
    }
    if (rootSNodes.size() > 0) {
        // collect all sentence and sort them.
        graph.traverse(rstRoots, GRAPH_TRAVERSE_TYPE.TOP_DOWN_DEPTH_FIRST, "getSentences", new GraphTraverseHandler() {

            @Override
            public void nodeReached(GRAPH_TRAVERSE_TYPE traversalType, String traversalId, SNode currNode, SRelation sRelation, SNode fromNode, long order) {
                if (currNode instanceof SStructure && isSegment(currNode)) {
                    sentences.add((SStructure) currNode);
                }
            }

            @Override
            public void nodeLeft(GRAPH_TRAVERSE_TYPE traversalType, String traversalId, SNode currNode, SRelation edge, SNode fromNode, long order) {
            }

            @Override
            public boolean checkConstraint(GRAPH_TRAVERSE_TYPE traversalType, String traversalId, SRelation edge, SNode currNode, long order) {
                // token are not needed
                if (currNode instanceof SToken) {
                    return false;
                }
                return true;
            }
        });
        // decorate segments with sentence number
        int i = 1;
        for (SStructure sentence : sentences) {
            sentence.createProcessingAnnotation(SENTENCE_INDEX, SENTENCE_INDEX, Integer.toString(i));
            i++;
        }
        graph.traverse(rstRoots, GRAPH_TRAVERSE_TYPE.TOP_DOWN_DEPTH_FIRST, "jsonBuild", this);
    } else {
        log.debug("does not find an annotation which matched {}", ANNOTATION_KEY);
        graph.traverse(rstRoots, GRAPH_TRAVERSE_TYPE.TOP_DOWN_DEPTH_FIRST, "jsonBuild", this);
    }
    return result.toString();
}
Also used : SToken(org.corpus_tools.salt.common.SToken) SRelation(org.corpus_tools.salt.core.SRelation) SNode(org.corpus_tools.salt.core.SNode) GRAPH_TRAVERSE_TYPE(org.corpus_tools.salt.core.SGraph.GRAPH_TRAVERSE_TYPE) GraphTraverseHandler(org.corpus_tools.salt.core.GraphTraverseHandler) ArrayList(java.util.ArrayList) SStructure(org.corpus_tools.salt.common.SStructure)

Example 20 with SToken

use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.

the class RSTImpl method createJsonEntry.

private JSONObject createJsonEntry(SNode currNode) {
    JSONObject jsonData = new JSONObject();
    StringBuilder sb = new StringBuilder();
    // use a hash set so we don't get any duplicate entries
    LinkedHashSet<SToken> token = new LinkedHashSet<>();
    List<SRelation<SNode, SNode>> edges;
    if (currNode instanceof SStructure) {
        edges = currNode.getGraph().getOutRelations(currNode.getId());
        // get all tokens directly dominated tokens and build a string
        for (SRelation<SNode, SNode> sedge : edges) {
            if (sedge.getTarget() instanceof SToken) {
                token.add((SToken) sedge.getTarget());
            }
        }
        // build strings
        Iterator<SToken> tokIterator = token.iterator();
        while (tokIterator.hasNext()) {
            SToken tok = tokIterator.next();
            String text = getText(tok);
            String color = getHTMLColor(tok);
            if (color != null) {
                sb.append("<span style=\"color : ").append(color).append(";\">");
            } else {
                sb.append("<span>");
            }
            if (tokIterator.hasNext()) {
                sb.append(text).append(" ");
            } else {
                sb.append(text);
            }
            sb.append("</span>");
        }
    }
    try {
        // build unique id, cause is used for an unique html element id.
        jsonData.put("id", getUniStrId(currNode));
        jsonData.put("name", currNode.getName());
        /**
         * additional data oject for edge labels and rendering sentences
         */
        JSONObject data = new JSONObject();
        JSONArray edgesJSON = getOutGoingEdgeTypeAnnotation(currNode);
        // since we have found some tokens, it must be a sentence in RST.
        if (token.size() > 0) {
            data.put("sentence", sb.toString());
        }
        if (edgesJSON != null) {
            data.put("edges", edgesJSON);
        }
        if (currNode instanceof SStructure && isSegment(currNode)) {
            SProcessingAnnotation sentence_idx = currNode.getProcessingAnnotation(SENTENCE_INDEX + "::" + SENTENCE_INDEX);
            int index = sentence_idx == null ? -1 : Integer.parseInt(sentence_idx.getValue_STEXT());
            data.put(SENTENCE_LEFT, index);
            data.put(SENTENCE_RIGHT, index);
        }
        jsonData.put("data", data);
    } catch (JSONException ex) {
        log.error("problems create entry for {}", currNode, ex);
    }
    return jsonData;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) SNode(org.corpus_tools.salt.core.SNode) SProcessingAnnotation(org.corpus_tools.salt.core.SProcessingAnnotation) JSONArray(org.json.JSONArray) JSONException(org.json.JSONException) SToken(org.corpus_tools.salt.common.SToken) SRelation(org.corpus_tools.salt.core.SRelation) JSONObject(org.json.JSONObject) SStructure(org.corpus_tools.salt.common.SStructure)

Aggregations

SToken (org.corpus_tools.salt.common.SToken)30 SNode (org.corpus_tools.salt.core.SNode)16 SRelation (org.corpus_tools.salt.core.SRelation)12 HashMap (java.util.HashMap)9 SAnnotation (org.corpus_tools.salt.core.SAnnotation)9 ArrayList (java.util.ArrayList)8 LinkedList (java.util.LinkedList)8 SSpan (org.corpus_tools.salt.common.SSpan)8 SFeature (org.corpus_tools.salt.core.SFeature)8 RelannisNodeFeature (annis.model.RelannisNodeFeature)7 SSpanningRelation (org.corpus_tools.salt.common.SSpanningRelation)7 SDocumentGraph (org.corpus_tools.salt.common.SDocumentGraph)6 SPointingRelation (org.corpus_tools.salt.common.SPointingRelation)5 Map (java.util.Map)4 TreeMap (java.util.TreeMap)4 SDocument (org.corpus_tools.salt.common.SDocument)4 Annotation (annis.model.Annotation)3 HashSet (java.util.HashSet)3 SDominanceRelation (org.corpus_tools.salt.common.SDominanceRelation)3 STextualDS (org.corpus_tools.salt.common.STextualDS)3