Search in sources :

Example 11 with SAnnotation

use of org.corpus_tools.salt.core.SAnnotation in project ANNIS by korpling.

the class RSTImpl method getOutGoingEdgeTypeAnnotation.

private JSONArray getOutGoingEdgeTypeAnnotation(SNode node) throws JSONException {
    List<SRelation<SNode, SNode>> out = node.getGraph().getOutRelations(node.getId());
    String type;
    Set<SAnnotation> annos;
    JSONArray edgeData = new JSONArray();
    // check if there is a pointing relation
    if (out == null) {
        return edgeData;
    }
    for (SRelation<SNode, SNode> edge : out) {
        if (!(edge instanceof SRelation) || edge.getTarget() instanceof SToken) {
            continue;
        }
        type = ((SRelation) edge).getType();
        String sTypeAsString = "edge";
        if (type != null && !type.isEmpty()) {
            sTypeAsString = type;
        }
        JSONObject jsonEdge = new JSONObject();
        edgeData.put(jsonEdge);
        jsonEdge.put("sType", sTypeAsString);
        if (((SRelation) edge).getTarget() instanceof SNode) {
            /**
             * Invert the direction of the RST-edge.
             */
            if (getRSTType().equals(sTypeAsString)) {
                jsonEdge.put("to", getUniStrId(node));
                jsonEdge.put("from", getUniStrId((SNode) ((SRelation) edge).getTarget()));
            } else {
                jsonEdge.put("from", getUniStrId(node));
                jsonEdge.put("to", getUniStrId((SNode) ((SRelation) edge).getTarget()));
            }
        } else {
            throw new JSONException("could not cast to SNode");
        }
        annos = edge.getAnnotations();
        if (annos != null) {
            for (SAnnotation anno : annos) {
                getOrCreateArray(jsonEdge, "annotation").put(anno.getValue_STEXT());
            }
        }
    }
    return edgeData;
}
Also used : SToken(org.corpus_tools.salt.common.SToken) SRelation(org.corpus_tools.salt.core.SRelation) SNode(org.corpus_tools.salt.core.SNode) JSONObject(org.json.JSONObject) SAnnotation(org.corpus_tools.salt.core.SAnnotation) JSONArray(org.json.JSONArray) JSONException(org.json.JSONException)

Example 12 with SAnnotation

use of org.corpus_tools.salt.core.SAnnotation in project ANNIS by korpling.

the class CSVMultiTokExporter method outputText.

/**
 * Takes a match and outputs a csv-line
 *
 * @param graph
 * @param alignmc
 * @param matchNumber
 * @param out
 *
 * @throws java.io.IOException
 */
@Override
public void outputText(SDocumentGraph graph, boolean alignmc, int matchNumber, Writer out) throws IOException, IllegalArgumentException {
    // first match
    if (matchNumber == 0) {
        // output header
        List<String> headerLine = new ArrayList<>();
        for (Map.Entry<Integer, TreeSet<String>> match : annotationsForMatchedNodes.entrySet()) {
            int node_id = match.getKey();
            headerLine.add(String.valueOf(node_id) + "_id");
            headerLine.add(String.valueOf(node_id) + "_span");
            for (String annoName : match.getValue()) {
                headerLine.add(String.valueOf(node_id) + "_anno_" + annoName);
            }
        }
        for (String key : metakeys) {
            headerLine.add("meta_" + key);
        }
        out.append(StringUtils.join(headerLine, "\t"));
        out.append("\n");
    }
    // output nodes in the order of the matches
    SortedMap<Integer, String> contentLine = new TreeMap<>();
    for (SNode node : this.getMatchedNodes(graph)) {
        List<String> nodeLine = new ArrayList<>();
        // export id
        RelannisNodeFeature feats = RelannisNodeFeature.extract(node);
        nodeLine.add(String.valueOf(feats.getInternalID()));
        // export spanned text
        String span = graph.getText(node);
        if (span != null)
            nodeLine.add(graph.getText(node));
        else
            nodeLine.add("");
        // export annotations
        int node_id = node.getFeature(AnnisConstants.ANNIS_NS, AnnisConstants.FEAT_MATCHEDNODE).getValue_SNUMERIC().intValue();
        for (String annoName : annotationsForMatchedNodes.get(node_id)) {
            SAnnotation anno = node.getAnnotation(annoName);
            if (anno != null) {
                nodeLine.add(anno.getValue_STEXT());
            } else
                nodeLine.add("'NULL'");
        }
        // add everything to line
        contentLine.put(node_id, StringUtils.join(nodeLine, "\t"));
    }
    out.append(StringUtils.join(contentLine.values(), "\t"));
    // TODO cache the metadata
    if (!metakeys.isEmpty()) {
        // TODO is this the best way to get the corpus name?
        String corpus_name = CommonHelper.getCorpusPath(java.net.URI.create(graph.getDocument().getId())).get(0);
        List<Annotation> asList = Helper.getMetaData(corpus_name, graph.getDocument().getName());
        for (Annotation anno : asList) {
            if (metakeys.contains(anno.getName()))
                out.append("\t" + anno.getValue());
        }
    }
    out.append("\n");
}
Also used : RelannisNodeFeature(annis.model.RelannisNodeFeature) SNode(org.corpus_tools.salt.core.SNode) SAnnotation(org.corpus_tools.salt.core.SAnnotation) ArrayList(java.util.ArrayList) TreeMap(java.util.TreeMap) Annotation(annis.model.Annotation) SAnnotation(org.corpus_tools.salt.core.SAnnotation) TreeSet(java.util.TreeSet) TreeMap(java.util.TreeMap) Map(java.util.Map) SortedMap(java.util.SortedMap)

Example 13 with SAnnotation

use of org.corpus_tools.salt.core.SAnnotation in project ANNIS by korpling.

the class LegacyGraphConverter method addRelation.

private static void addRelation(Class<? extends SRelation> clazz, String type, Collection<SAnnotation> annotations, SNode source, SNode target, Set<SLayer> relLayers, long pre, long componentID, Map<SNode, AnnisNode> allNodes, AnnotationGraph annoGraph) {
    Edge aEdge = new Edge();
    aEdge.setSource(allNodes.get(source));
    aEdge.setDestination(allNodes.get(target));
    aEdge.setEdgeType(EdgeType.UNKNOWN);
    aEdge.setPre(pre);
    aEdge.setComponentID(componentID);
    if (!relLayers.isEmpty()) {
        aEdge.setNamespace(relLayers.iterator().next().getName());
    }
    aEdge.setName(type);
    if (SDominanceRelation.class.isAssignableFrom(clazz)) {
        aEdge.setEdgeType(EdgeType.DOMINANCE);
    } else if (SPointingRelation.class.isAssignableFrom(clazz)) {
        aEdge.setEdgeType(EdgeType.POINTING_RELATION);
    } else if (SSpanningRelation.class.isAssignableFrom(clazz)) {
        aEdge.setEdgeType(EdgeType.COVERAGE);
    }
    for (SAnnotation sAnno : annotations) {
        aEdge.addAnnotation(new Annotation(sAnno.getNamespace(), sAnno.getName(), sAnno.getValue_STEXT()));
    }
    annoGraph.addEdge(aEdge);
    aEdge.getDestination().addIncomingEdge(aEdge);
    if (aEdge.getSource() != null) {
        aEdge.getSource().addOutgoingEdge(aEdge);
    }
}
Also used : SPointingRelation(org.corpus_tools.salt.common.SPointingRelation) SAnnotation(org.corpus_tools.salt.core.SAnnotation) Edge(annis.model.Edge) Annotation(annis.model.Annotation) SAnnotation(org.corpus_tools.salt.core.SAnnotation)

Example 14 with SAnnotation

use of org.corpus_tools.salt.core.SAnnotation in project ANNIS by korpling.

the class LegacyGraphConverter method convertToAnnotationGraph.

public static AnnotationGraph convertToAnnotationGraph(SDocumentGraph docGraph, List<Long> matchedNodeIDs) {
    Set<Long> matchSet = new HashSet<>(matchedNodeIDs);
    AnnotationGraph annoGraph = new AnnotationGraph();
    List<String> pathList = CommonHelper.getCorpusPath(docGraph.getDocument().getGraph(), docGraph.getDocument());
    annoGraph.setPath(pathList.toArray(new String[pathList.size()]));
    annoGraph.setDocumentName(docGraph.getDocument().getName());
    Map<SNode, AnnisNode> allNodes = new HashMap<>();
    for (SNode sNode : docGraph.getNodes()) {
        SFeature featNodeRaw = sNode.getFeature(SaltUtil.createQName(ANNIS_NS, FEAT_RELANNIS_NODE));
        if (featNodeRaw != null) {
            RelannisNodeFeature featNode = (RelannisNodeFeature) featNodeRaw.getValue();
            long internalID = featNode.getInternalID();
            AnnisNode aNode = new AnnisNode(internalID);
            for (SAnnotation sAnno : sNode.getAnnotations()) {
                aNode.addNodeAnnotation(new Annotation(sAnno.getNamespace(), sAnno.getName(), sAnno.getValue_STEXT()));
            }
            aNode.setName(sNode.getName());
            Set<SLayer> layers = sNode.getLayers();
            if (!layers.isEmpty()) {
                aNode.setNamespace(layers.iterator().next().getName());
            }
            RelannisNodeFeature feat = (RelannisNodeFeature) sNode.getFeature(SaltUtil.createQName(ANNIS_NS, FEAT_RELANNIS_NODE)).getValue();
            if (sNode instanceof SToken) {
                List<DataSourceSequence> seqList = docGraph.getOverlappedDataSourceSequence(sNode, SALT_TYPE.STEXT_OVERLAPPING_RELATION);
                if (seqList != null) {
                    DataSourceSequence seq = seqList.get(0);
                    Preconditions.checkNotNull(seq, "DataSourceSequence is null for token %s", sNode.getId());
                    SSequentialDS seqDS = seq.getDataSource();
                    Preconditions.checkNotNull(seqDS, "SSequentalDS is null for token %s", sNode.getId());
                    Preconditions.checkNotNull(seqDS.getData(), "SSequentalDS data is null for token %s", sNode.getId());
                    String seqDSData = (String) seqDS.getData();
                    Preconditions.checkNotNull(seqDSData, "casted SSequentalDS data is null for token %s", sNode.getId());
                    Preconditions.checkNotNull(seq.getStart(), "SSequentalDS start is null for token %s", sNode.getId());
                    Preconditions.checkNotNull(seq.getEnd(), "SSequentalDS end is null for supposed token %s", sNode.getId());
                    int start = seq.getStart().intValue();
                    int end = seq.getEnd().intValue();
                    Preconditions.checkState(start >= 0 && start <= end && end <= seqDSData.length(), "Illegal start or end of textual DS for token (start %s, end: %s)", sNode.getId(), start, end);
                    String spannedText = seqDSData.substring(start, end);
                    Preconditions.checkNotNull(spannedText, "spanned text is null for supposed token %s (start: %s, end: %s)", sNode.getId(), start, end);
                    aNode.setSpannedText(spannedText);
                    aNode.setToken(true);
                    aNode.setTokenIndex(feat.getTokenIndex());
                }
            } else {
                aNode.setToken(false);
                aNode.setTokenIndex(null);
            }
            aNode.setCorpus(feat.getCorpusRef());
            aNode.setTextId(feat.getTextRef());
            aNode.setLeft(feat.getLeft());
            aNode.setLeftToken(feat.getLeftToken());
            aNode.setRight(feat.getRight());
            aNode.setRightToken(feat.getRightToken());
            if (matchSet.contains(aNode.getId())) {
                aNode.setMatchedNodeInQuery((long) matchedNodeIDs.indexOf(aNode.getId()) + 1);
                annoGraph.getMatchedNodeIds().add(aNode.getId());
            } else {
                aNode.setMatchedNodeInQuery(null);
            }
            annoGraph.addNode(aNode);
            allNodes.put(sNode, aNode);
        }
    }
    for (SRelation rel : docGraph.getRelations()) {
        RelannisEdgeFeature featRelation = RelannisEdgeFeature.extract(rel);
        if (featRelation != null) {
            addRelation(rel, featRelation.getPre(), featRelation.getComponentID(), allNodes, annoGraph);
        }
    }
    // add relations with empty relation name for every dominance relation
    List<SDominanceRelation> dominanceRelations = new LinkedList<>(docGraph.getDominanceRelations());
    for (SDominanceRelation rel : dominanceRelations) {
        RelannisEdgeFeature featEdge = RelannisEdgeFeature.extract(rel);
        if (featEdge != null && featEdge.getArtificialDominanceComponent() != null && featEdge.getArtificialDominancePre() != null) {
            addRelation(SDominanceRelation.class, null, rel.getAnnotations(), rel.getSource(), rel.getTarget(), rel.getLayers(), featEdge.getArtificialDominancePre(), featEdge.getArtificialDominanceComponent(), allNodes, annoGraph);
        }
    }
    return annoGraph;
}
Also used : SLayer(org.corpus_tools.salt.core.SLayer) SNode(org.corpus_tools.salt.core.SNode) HashMap(java.util.HashMap) SToken(org.corpus_tools.salt.common.SToken) SRelation(org.corpus_tools.salt.core.SRelation) RelannisEdgeFeature(annis.model.RelannisEdgeFeature) SDominanceRelation(org.corpus_tools.salt.common.SDominanceRelation) HashSet(java.util.HashSet) RelannisNodeFeature(annis.model.RelannisNodeFeature) SAnnotation(org.corpus_tools.salt.core.SAnnotation) SSequentialDS(org.corpus_tools.salt.common.SSequentialDS) DataSourceSequence(org.corpus_tools.salt.util.DataSourceSequence) Annotation(annis.model.Annotation) SAnnotation(org.corpus_tools.salt.core.SAnnotation) LinkedList(java.util.LinkedList) AnnotationGraph(annis.model.AnnotationGraph) AnnisNode(annis.model.AnnisNode) SFeature(org.corpus_tools.salt.core.SFeature)

Example 15 with SAnnotation

use of org.corpus_tools.salt.core.SAnnotation in project ANNIS by korpling.

the class TimelineReconstructorTest method testBematacDialog.

/**
 * Tests a sample dialog reconstruction.
 * The dialog is this one: https://korpling.org/annis3/?id=44b60a56-31da-4469-b438-62fdb67f28f1
 *
 * The Salt which was generated by ANNIS is loaded and the virtual tokenization is removed.
 * It is checked if
 * <ul>
 * <li>the newly created tokenization is correct</li>
 * <li>spans cover the correct token</li>
 * </ul>
 */
@Test
public void testBematacDialog() {
    SDocumentGraph docGraph = SaltUtil.loadDocumentGraph(URI.createURI(getClass().getResource("SampleDialog.salt").toString()));
    Map<String, String> anno2order = new HashMap<>();
    anno2order.put("default_ns::instructee_utt", "instructee_dipl");
    anno2order.put("default_ns::instructor_utt", "instructor_dipl");
    TimelineReconstructor.removeVirtualTokenization(docGraph, anno2order);
    // instructor_dipl, instructor_norm, instructee_dipl, instructee_norm, instructee_extra, break
    List<STextualDS> texts = docGraph.getTextualDSs();
    assertEquals(6, texts.size());
    STextualDS instructorDipl = findTextualDSByName("instructor_dipl", texts);
    assertNotNull(instructorDipl);
    assertEquals("in Richtung des Toasters gehst ja gehst", instructorDipl.getText());
    DataSourceSequence<Integer> seq = new DataSourceSequence<>();
    seq.setDataSource(instructorDipl);
    seq.setStart(instructorDipl.getStart());
    seq.setEnd(instructorDipl.getEnd());
    List<SToken> instructorDiplToken = docGraph.getTokensBySequence(seq);
    assertEquals(7, instructorDiplToken.size());
    assertEquals("in", docGraph.getText(instructorDiplToken.get(0)));
    assertEquals("Richtung", docGraph.getText(instructorDiplToken.get(1)));
    assertEquals("des", docGraph.getText(instructorDiplToken.get(2)));
    assertEquals("Toasters", docGraph.getText(instructorDiplToken.get(3)));
    assertEquals("gehst", docGraph.getText(instructorDiplToken.get(4)));
    assertEquals("ja", docGraph.getText(instructorDiplToken.get(5)));
    assertEquals("gehst", docGraph.getText(instructorDiplToken.get(6)));
    // check that the other real spans are now connected with the token
    List<SNode> uttNode = docGraph.getNodesByName("sSpan1294");
    assertNotNull(uttNode);
    assertEquals(1, uttNode.size());
    SAnnotation uttAnno = uttNode.get(0).getAnnotation("default_ns::instructor_utt");
    assertNotNull(uttAnno);
    assertEquals("utt", uttAnno.getValue_STEXT());
    List<SRelation> uttOutRelations = uttNode.get(0).getOutRelations();
    assertNotNull(uttOutRelations);
    assertEquals(5, uttOutRelations.size());
    for (SRelation rel : uttOutRelations) {
        assertTrue(rel instanceof SSpanningRelation);
        assertEquals(instructorDipl, CommonHelper.getTextualDSForNode((SNode) rel.getTarget(), docGraph));
    }
    STextualDS instructorNorm = findTextualDSByName("instructor_norm", texts);
    assertNotNull(instructorNorm);
    assertEquals("in Richtung des Toasters gehst ja gehst", instructorNorm.getText());
    STextualDS instructeeDipl = findTextualDSByName("instructee_dipl", texts);
    assertNotNull(instructeeDipl);
    assertEquals("mhm ich geh in Richtung des Toasters okay", instructeeDipl.getText());
    STextualDS instructeeNorm = findTextualDSByName("instructee_norm", texts);
    assertNotNull(instructeeNorm);
    assertEquals("ich gehe in Richtung des Toasters okay", instructeeNorm.getText());
    STextualDS instructeeExtra = findTextualDSByName("instructee_extra", texts);
    assertNotNull(instructeeExtra);
    assertEquals("zeichnet", instructeeExtra.getText());
    STextualDS breakText = findTextualDSByName("break", texts);
    assertNotNull(breakText);
    assertEquals("0,7 0,5", breakText.getText());
}
Also used : SNode(org.corpus_tools.salt.core.SNode) HashMap(java.util.HashMap) SAnnotation(org.corpus_tools.salt.core.SAnnotation) DataSourceSequence(org.corpus_tools.salt.util.DataSourceSequence) SToken(org.corpus_tools.salt.common.SToken) SRelation(org.corpus_tools.salt.core.SRelation) SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) SSpanningRelation(org.corpus_tools.salt.common.SSpanningRelation) STextualDS(org.corpus_tools.salt.common.STextualDS) Test(org.junit.Test)

Aggregations

SAnnotation (org.corpus_tools.salt.core.SAnnotation)22 SToken (org.corpus_tools.salt.common.SToken)9 SNode (org.corpus_tools.salt.core.SNode)8 SRelation (org.corpus_tools.salt.core.SRelation)8 LinkedList (java.util.LinkedList)6 RelannisNodeFeature (annis.model.RelannisNodeFeature)5 HashMap (java.util.HashMap)4 SPointingRelation (org.corpus_tools.salt.common.SPointingRelation)4 SSpanningRelation (org.corpus_tools.salt.common.SSpanningRelation)4 SFeature (org.corpus_tools.salt.core.SFeature)4 SLayer (org.corpus_tools.salt.core.SLayer)4 Annotation (annis.model.Annotation)3 ArrayList (java.util.ArrayList)3 TreeSet (java.util.TreeSet)3 SDocumentGraph (org.corpus_tools.salt.common.SDocumentGraph)3 SSpan (org.corpus_tools.salt.common.SSpan)3 STextualDS (org.corpus_tools.salt.common.STextualDS)3 DataSourceSequence (org.corpus_tools.salt.util.DataSourceSequence)3 IOException (java.io.IOException)2 HashSet (java.util.HashSet)2