Search in sources :

Example 16 with SNode

use of org.corpus_tools.salt.core.SNode in project ANNIS by korpling.

the class RSTImpl method getOutGoingEdgeTypeAnnotation.

private JSONArray getOutGoingEdgeTypeAnnotation(SNode node) throws JSONException {
    List<SRelation<SNode, SNode>> out = node.getGraph().getOutRelations(node.getId());
    String type;
    Set<SAnnotation> annos;
    JSONArray edgeData = new JSONArray();
    // check if there is a pointing relation
    if (out == null) {
        return edgeData;
    }
    for (SRelation<SNode, SNode> edge : out) {
        if (!(edge instanceof SRelation) || edge.getTarget() instanceof SToken) {
            continue;
        }
        type = ((SRelation) edge).getType();
        String sTypeAsString = "edge";
        if (type != null && !type.isEmpty()) {
            sTypeAsString = type;
        }
        JSONObject jsonEdge = new JSONObject();
        edgeData.put(jsonEdge);
        jsonEdge.put("sType", sTypeAsString);
        if (((SRelation) edge).getTarget() instanceof SNode) {
            /**
             * Invert the direction of the RST-edge.
             */
            if (getRSTType().equals(sTypeAsString)) {
                jsonEdge.put("to", getUniStrId(node));
                jsonEdge.put("from", getUniStrId((SNode) ((SRelation) edge).getTarget()));
            } else {
                jsonEdge.put("from", getUniStrId(node));
                jsonEdge.put("to", getUniStrId((SNode) ((SRelation) edge).getTarget()));
            }
        } else {
            throw new JSONException("could not cast to SNode");
        }
        annos = edge.getAnnotations();
        if (annos != null) {
            for (SAnnotation anno : annos) {
                getOrCreateArray(jsonEdge, "annotation").put(anno.getValue_STEXT());
            }
        }
    }
    return edgeData;
}
Also used : SToken(org.corpus_tools.salt.common.SToken) SRelation(org.corpus_tools.salt.core.SRelation) SNode(org.corpus_tools.salt.core.SNode) JSONObject(org.json.JSONObject) SAnnotation(org.corpus_tools.salt.core.SAnnotation) JSONArray(org.json.JSONArray) JSONException(org.json.JSONException)

Example 17 with SNode

use of org.corpus_tools.salt.core.SNode in project ANNIS by korpling.

the class SaltAnnotateExtractorTest method testLayerNodes.

@Test
public void testLayerNodes() throws SQLException {
    SaltProject project = instance.extractData(resultSetProviderSingleText.getResultSet());
    assertNotNull(project);
    SDocumentGraph g = project.getCorpusGraphs().get(0).getDocuments().get(0).getDocumentGraph();
    List<SNode> n = new ArrayList<>(g.getLayerByName("exmaralda").get(0).getNodes());
    Collections.sort(n, new NameComparator());
    assertEquals(9, n.size());
    assertEquals("Focus_newInfSeg_10", n.get(0).getName());
    assertEquals("Focus_newInfSeg_9", n.get(1).getName());
    assertEquals("Inf-StatSeg_29", n.get(2).getName());
    assertEquals("Inf-StatSeg_30", n.get(3).getName());
    assertEquals("NPSeg_29", n.get(4).getName());
    assertEquals("NPSeg_30", n.get(5).getName());
    assertEquals("PPSeg_7", n.get(6).getName());
    assertEquals("SentSeg_10", n.get(7).getName());
    assertEquals("SentSeg_9", n.get(8).getName());
    n = new ArrayList<>(g.getLayerByName("mmax").get(0).getNodes());
    Collections.sort(n, new NameComparator());
    assertEquals(5, n.size());
    assertEquals("primmarkSeg_1000154", n.get(0).getName());
    assertEquals("primmarkSeg_60", n.get(1).getName());
    assertEquals("sentenceSeg_50010", n.get(2).getName());
    assertEquals("sentenceSeg_50011", n.get(3).getName());
    assertEquals("sentenceSeg_5009", n.get(4).getName());
    n = new ArrayList<>(g.getLayerByName("tiger").get(0).getNodes());
    Collections.sort(n, new NameComparator());
    assertEquals(10, n.size());
    assertEquals("const_50", n.get(0).getName());
    assertEquals("const_52", n.get(1).getName());
    assertEquals("const_54", n.get(2).getName());
    assertEquals("const_55", n.get(3).getName());
    assertEquals("const_56", n.get(4).getName());
    assertEquals("const_57", n.get(5).getName());
    assertEquals("const_58", n.get(6).getName());
    assertEquals("const_59", n.get(7).getName());
    assertEquals("const_60", n.get(8).getName());
    assertEquals("const_61", n.get(9).getName());
    n = new ArrayList<>(g.getLayerByName("default_ns").get(0).getNodes());
    Collections.sort(n, new NameComparator());
    assertEquals(12, n.size());
    assertEquals("tok_150", n.get(0).getName());
    assertEquals("tok_151", n.get(1).getName());
    assertEquals("tok_152", n.get(2).getName());
    assertEquals("tok_153", n.get(3).getName());
    assertEquals("tok_154", n.get(4).getName());
    assertEquals("tok_155", n.get(5).getName());
    assertEquals("tok_156", n.get(6).getName());
    assertEquals("tok_157", n.get(7).getName());
    assertEquals("tok_158", n.get(8).getName());
    assertEquals("tok_159", n.get(9).getName());
    assertEquals("tok_160", n.get(10).getName());
    assertEquals("tok_161", n.get(11).getName());
    n = new ArrayList<>(g.getLayerByName("rst").get(0).getNodes());
    Collections.sort(n, new NameComparator());
    assertEquals(9, n.size());
    assertEquals("u0", n.get(0).getName());
    assertEquals("u10", n.get(1).getName());
    assertEquals("u11", n.get(2).getName());
    assertEquals("u12", n.get(3).getName());
    assertEquals("u20", n.get(4).getName());
    assertEquals("u23", n.get(5).getName());
    assertEquals("u24", n.get(6).getName());
    assertEquals("u27", n.get(7).getName());
    assertEquals("u28", n.get(8).getName());
    assertEquals(0, g.getLayerByName("dep").get(0).getNodes().size());
}
Also used : SNode(org.corpus_tools.salt.core.SNode) SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) ArrayList(java.util.ArrayList) SaltProject(org.corpus_tools.salt.common.SaltProject) Test(org.junit.Test)

Example 18 with SNode

use of org.corpus_tools.salt.core.SNode in project ANNIS by korpling.

the class TextColumnExporter method outputText.

/**
 * Writes the specified record (if applicable, as multiple result lines) from query result set to the output file.
 *
 * @param graph the org.corpus_tools.salt.common.SDocumentGraph representation of a specified record
 * @param alignmc a boolean, which indicates, whether the data should be aligned by match numbers or not
 * @param recordNumber the number of record within  the record set
 * @param out the specified Writer
 *
 * @throws IOException, if an I/O error occurs
 */
@Override
public void outputText(SDocumentGraph graph, boolean alignmc, int recordNumber, Writer out) throws IOException {
    String currSpeakerName = "";
    String prevSpeakerName = "";
    if (graph != null) {
        List<SToken> orderedToken = graph.getSortedTokenByText();
        if (orderedToken != null) {
            // iterate over token
            ListIterator<SToken> it = orderedToken.listIterator();
            long lastTokenWasMatched = -1;
            boolean noPreviousTokenInLine = false;
            // if match number == 0, reset global variables and output warning, if necessary
            if (recordNumber == 0) {
                isFirstSpeakerWithMatch = true;
                counterGlobal = 0;
                // create warning message
                String numbersString = "";
                String warnMessage = "";
                StringBuilder sb = new StringBuilder();
                List<Integer> copyOfFilterNumbersSetByUser = new ArrayList<Integer>();
                for (Long filterNumber : filterNumbersSetByUser) {
                    copyOfFilterNumbersSetByUser.add(Integer.parseInt(String.valueOf(filterNumber)));
                }
                for (Integer matchNumberGlobal : matchNumbersGlobal) {
                    copyOfFilterNumbersSetByUser.remove(matchNumberGlobal);
                }
                Collections.sort(copyOfFilterNumbersSetByUser);
                if (!copyOfFilterNumbersSetByUser.isEmpty()) {
                    for (Integer filterNumber : copyOfFilterNumbersSetByUser) {
                        sb.append(filterNumber + ", ");
                    }
                    if (copyOfFilterNumbersSetByUser.size() == 1) {
                        numbersString = "number";
                    } else {
                        numbersString = "numbers";
                    }
                    warnMessage = "1. Filter " + numbersString + " " + sb.toString().substring(0, sb.lastIndexOf(",")) + " couldn't be represented.";
                }
                if (alignmc && !dataIsAlignable) {
                    if (!warnMessage.isEmpty()) {
                        warnMessage += (NEWLINE + NEWLINE + "2. ");
                    } else {
                        warnMessage += "1. ";
                    }
                    warnMessage += "You have tried to align matches by node number via check box." + "Unfortunately this option is not applicable for this data set, " + "so the data couldn't be aligned.";
                }
                if (!warnMessage.isEmpty()) {
                    String warnCaption = "Some export options couldn't be realized.";
                    Notification warn = new Notification(warnCaption, warnMessage, Notification.Type.WARNING_MESSAGE);
                    warn.setDelayMsec(20000);
                    warn.show(Page.getCurrent());
                }
            }
            // global variables reset; warning issued
            int matchesWrittenForSpeaker = 0;
            while (it.hasNext()) {
                SToken tok = it.next();
                counterGlobal++;
                // get current speaker name
                String name;
                if ((name = CommonHelper.getTextualDSForNode(tok, graph).getName()) == null) {
                    name = "";
                }
                currSpeakerName = (recordNumber + 1) + "_" + name;
                // if speaker has no matches, skip token
                if (speakerHasMatches.get(currSpeakerName) == false) {
                    prevSpeakerName = currSpeakerName;
                // continue;
                } else // if speaker has matches
                {
                    // if the current speaker is new, write header and append his name
                    if (!currSpeakerName.equals(prevSpeakerName)) {
                        // reset the counter of matches, which were written for this speaker
                        matchesWrittenForSpeaker = 0;
                        if (isFirstSpeakerWithMatch) {
                            out.append("match_number" + TAB_MARK);
                            out.append("speaker" + TAB_MARK);
                            // write header for meta data columns
                            if (!listOfMetakeys.isEmpty()) {
                                for (String metakey : listOfMetakeys) {
                                    out.append(metakey + TAB_MARK);
                                }
                            }
                            out.append("left_context" + TAB_MARK);
                            String prefixAlignmc = "match_";
                            String prefix = "match_column";
                            String middle_context = "middle_context_";
                            if (alignmc && dataIsAlignable) {
                                for (int i = 0; i < orderedMatchNumbersGlobal.size(); i++) {
                                    out.append(prefixAlignmc + orderedMatchNumbersGlobal.get(i) + TAB_MARK);
                                    if (i < orderedMatchNumbersGlobal.size() - 1) {
                                        out.append(middle_context + (i + 1) + TAB_MARK);
                                    }
                                }
                            } else {
                                for (int i = 0; i < maxMatchesPerLine; i++) {
                                    out.append(prefix + TAB_MARK);
                                    if (i < (maxMatchesPerLine - 1)) {
                                        out.append(middle_context + (i + 1) + TAB_MARK);
                                    }
                                }
                            }
                            out.append("right_context");
                            out.append(NEWLINE);
                            isFirstSpeakerWithMatch = false;
                        } else {
                            out.append(NEWLINE);
                        }
                        out.append(String.valueOf(recordNumber + 1) + TAB_MARK);
                        String trimmedName = "";
                        if (currSpeakerName.indexOf("_") < currSpeakerName.length()) {
                            trimmedName = currSpeakerName.substring(currSpeakerName.indexOf("_") + 1);
                        }
                        out.append(trimmedName + TAB_MARK);
                        // write meta data
                        if (!listOfMetakeys.isEmpty()) {
                            // get metadata
                            String docName = graph.getDocument().getName();
                            List<String> corpusPath = CommonHelper.getCorpusPath(graph.getDocument().getGraph(), graph.getDocument());
                            String corpusName = corpusPath.get(corpusPath.size() - 1);
                            corpusName = urlPathEscape.escape(corpusName);
                            List<Annotation> metadata = Helper.getMetaData(corpusName, docName);
                            Map<String, String> annosWithoutNamespace = new HashMap<String, String>();
                            Map<String, Map<String, String>> annosWithNamespace = new HashMap<String, Map<String, String>>();
                            // put metadata annotations into hash maps for better access
                            for (Annotation metaAnno : metadata) {
                                String ns;
                                Map<String, String> data = new HashMap<String, String>();
                                data.put(metaAnno.getName(), metaAnno.getValue());
                                // a namespace is present
                                if ((ns = metaAnno.getNamespace()) != null && !ns.isEmpty()) {
                                    Map<String, String> nsMetadata = new HashMap<String, String>();
                                    if (annosWithNamespace.get(ns) != null) {
                                        nsMetadata = annosWithNamespace.get(ns);
                                    }
                                    nsMetadata.putAll(data);
                                    annosWithNamespace.put(ns, nsMetadata);
                                } else {
                                    annosWithoutNamespace.putAll(data);
                                }
                            }
                            for (String metakey : listOfMetakeys) {
                                String metaValue = "";
                                // try to get meta value specific for current speaker
                                if (!trimmedName.isEmpty() && annosWithNamespace.containsKey(trimmedName)) {
                                    Map<String, String> speakerAnnos = annosWithNamespace.get(trimmedName);
                                    if (speakerAnnos.containsKey(metakey)) {
                                        metaValue = speakerAnnos.get(metakey).trim();
                                    }
                                }
                                // try to get meta value, if metaValue is not set
                                if (metaValue.isEmpty() && annosWithoutNamespace.containsKey(metakey)) {
                                    metaValue = annosWithoutNamespace.get(metakey).trim();
                                }
                                out.append(metaValue + TAB_MARK);
                            }
                        }
                        // metadata written
                        lastTokenWasMatched = -1;
                        noPreviousTokenInLine = true;
                    }
                    // header, speaker name and metadata ready
                    // default to space as separator
                    String separator = SPACE;
                    List<SNode> root = new LinkedList<>();
                    root.add(tok);
                    Long matchedNode;
                    // token matched
                    if ((matchedNode = tokenToMatchNumber.get(counterGlobal)) != null) {
                        // is dominated by a (new) matched node, thus use tab to separate the non-matches from the matches
                        if (lastTokenWasMatched < 0) {
                            if (alignmc && dataIsAlignable) {
                                int orderInList = orderedMatchNumbersGlobal.indexOf(matchedNode);
                                if (orderInList >= matchesWrittenForSpeaker) {
                                    int diff = orderInList - matchesWrittenForSpeaker;
                                    matchesWrittenForSpeaker++;
                                    StringBuilder sb = new StringBuilder(TAB_MARK);
                                    for (int i = 0; i < diff; i++) {
                                        sb.append(TAB_MARK + TAB_MARK);
                                        matchesWrittenForSpeaker++;
                                    }
                                    separator = sb.toString();
                                }
                            } else {
                                separator = TAB_MARK;
                            }
                        } else if (lastTokenWasMatched != matchedNode) {
                            // always leave an empty column between two matches, even if there is no actual context
                            if (alignmc && dataIsAlignable) {
                                int orderInList = orderedMatchNumbersGlobal.indexOf(matchedNode);
                                if (orderInList >= matchesWrittenForSpeaker) {
                                    int diff = orderInList - matchesWrittenForSpeaker;
                                    matchesWrittenForSpeaker++;
                                    StringBuilder sb = new StringBuilder(TAB_MARK + TAB_MARK);
                                    for (int i = 0; i < diff; i++) {
                                        sb.append(TAB_MARK + TAB_MARK);
                                        matchesWrittenForSpeaker++;
                                    }
                                    separator = sb.toString();
                                }
                            } else {
                                separator = TAB_MARK + TAB_MARK;
                            }
                        }
                        lastTokenWasMatched = matchedNode;
                    } else // token not matched, but last token matched
                    if (lastTokenWasMatched >= 0) {
                        // handle crossing edges
                        if (!tokenToMatchNumber.containsKey(counterGlobal) && tokenToMatchNumber.containsKey(counterGlobal - 1) && tokenToMatchNumber.containsKey(counterGlobal + 1)) {
                            if (Objects.equals(tokenToMatchNumber.get(counterGlobal - 1), tokenToMatchNumber.get(counterGlobal + 1))) {
                                separator = SPACE;
                                lastTokenWasMatched = tokenToMatchNumber.get(counterGlobal + 1);
                            } else {
                                separator = TAB_MARK;
                                lastTokenWasMatched = -1;
                            }
                        } else // mark the end of a match with the tab
                        {
                            separator = TAB_MARK;
                            lastTokenWasMatched = -1;
                        }
                    }
                    // if tok is the first token in the line and not matched, set separator to empty string
                    if (noPreviousTokenInLine && separator.equals(SPACE)) {
                        separator = "";
                    }
                    out.append(separator);
                    // append the current token
                    out.append(graph.getText(tok));
                    noPreviousTokenInLine = false;
                    prevSpeakerName = currSpeakerName;
                }
            }
        }
    }
}
Also used : SNode(org.corpus_tools.salt.core.SNode) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Notification(com.vaadin.ui.Notification) Annotation(annis.model.Annotation) LinkedList(java.util.LinkedList) SToken(org.corpus_tools.salt.common.SToken) HashMap(java.util.HashMap) Map(java.util.Map)

Example 19 with SNode

use of org.corpus_tools.salt.core.SNode in project ANNIS by korpling.

the class CSVMultiTokExporter method outputText.

/**
 * Takes a match and outputs a csv-line
 *
 * @param graph
 * @param alignmc
 * @param matchNumber
 * @param out
 *
 * @throws java.io.IOException
 */
@Override
public void outputText(SDocumentGraph graph, boolean alignmc, int matchNumber, Writer out) throws IOException, IllegalArgumentException {
    // first match
    if (matchNumber == 0) {
        // output header
        List<String> headerLine = new ArrayList<>();
        for (Map.Entry<Integer, TreeSet<String>> match : annotationsForMatchedNodes.entrySet()) {
            int node_id = match.getKey();
            headerLine.add(String.valueOf(node_id) + "_id");
            headerLine.add(String.valueOf(node_id) + "_span");
            for (String annoName : match.getValue()) {
                headerLine.add(String.valueOf(node_id) + "_anno_" + annoName);
            }
        }
        for (String key : metakeys) {
            headerLine.add("meta_" + key);
        }
        out.append(StringUtils.join(headerLine, "\t"));
        out.append("\n");
    }
    // output nodes in the order of the matches
    SortedMap<Integer, String> contentLine = new TreeMap<>();
    for (SNode node : this.getMatchedNodes(graph)) {
        List<String> nodeLine = new ArrayList<>();
        // export id
        RelannisNodeFeature feats = RelannisNodeFeature.extract(node);
        nodeLine.add(String.valueOf(feats.getInternalID()));
        // export spanned text
        String span = graph.getText(node);
        if (span != null)
            nodeLine.add(graph.getText(node));
        else
            nodeLine.add("");
        // export annotations
        int node_id = node.getFeature(AnnisConstants.ANNIS_NS, AnnisConstants.FEAT_MATCHEDNODE).getValue_SNUMERIC().intValue();
        for (String annoName : annotationsForMatchedNodes.get(node_id)) {
            SAnnotation anno = node.getAnnotation(annoName);
            if (anno != null) {
                nodeLine.add(anno.getValue_STEXT());
            } else
                nodeLine.add("'NULL'");
        }
        // add everything to line
        contentLine.put(node_id, StringUtils.join(nodeLine, "\t"));
    }
    out.append(StringUtils.join(contentLine.values(), "\t"));
    // TODO cache the metadata
    if (!metakeys.isEmpty()) {
        // TODO is this the best way to get the corpus name?
        String corpus_name = CommonHelper.getCorpusPath(java.net.URI.create(graph.getDocument().getId())).get(0);
        List<Annotation> asList = Helper.getMetaData(corpus_name, graph.getDocument().getName());
        for (Annotation anno : asList) {
            if (metakeys.contains(anno.getName()))
                out.append("\t" + anno.getValue());
        }
    }
    out.append("\n");
}
Also used : RelannisNodeFeature(annis.model.RelannisNodeFeature) SNode(org.corpus_tools.salt.core.SNode) SAnnotation(org.corpus_tools.salt.core.SAnnotation) ArrayList(java.util.ArrayList) TreeMap(java.util.TreeMap) Annotation(annis.model.Annotation) SAnnotation(org.corpus_tools.salt.core.SAnnotation) TreeSet(java.util.TreeSet) TreeMap(java.util.TreeMap) Map(java.util.Map) SortedMap(java.util.SortedMap)

Example 20 with SNode

use of org.corpus_tools.salt.core.SNode in project ANNIS by korpling.

the class Helper method calculateMarkedAndCoveredIDs.

public static Map<String, Long> calculateMarkedAndCoveredIDs(SDocument doc, List<SNode> segNodes, String segmentationName) {
    Map<String, Long> initialCovered = new HashMap<>();
    // add all covered nodes
    for (SNode n : doc.getDocumentGraph().getNodes()) {
        SFeature featMatched = n.getFeature(ANNIS_NS, FEAT_MATCHEDNODE);
        Long match = featMatched == null ? null : featMatched.getValue_SNUMERIC();
        if (match != null) {
            initialCovered.put(n.getId(), match);
        }
    }
    // calculate covered nodes
    CoveredMatchesCalculator cmc = new CoveredMatchesCalculator(doc.getDocumentGraph(), initialCovered);
    Map<String, Long> covered = cmc.getMatchedAndCovered();
    if (segmentationName != null) {
        // filter token
        Map<SToken, Long> coveredToken = new HashMap<>();
        for (Map.Entry<String, Long> e : covered.entrySet()) {
            SNode n = doc.getDocumentGraph().getNode(e.getKey());
            if (n instanceof SToken) {
                coveredToken.put((SToken) n, e.getValue());
            }
        }
        for (SNode segNode : segNodes) {
            RelannisNodeFeature featSegNode = (RelannisNodeFeature) segNode.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
            if (!covered.containsKey(segNode.getId())) {
                long leftTok = featSegNode.getLeftToken();
                long rightTok = featSegNode.getRightToken();
                // check for each covered token if this segment is covering it
                for (Map.Entry<SToken, Long> e : coveredToken.entrySet()) {
                    RelannisNodeFeature featTok = (RelannisNodeFeature) e.getKey().getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
                    long entryTokenIndex = featTok.getTokenIndex();
                    if (entryTokenIndex <= rightTok && entryTokenIndex >= leftTok) {
                        // add this segmentation node to the covered set
                        covered.put(segNode.getId(), e.getValue());
                        break;
                    }
                }
            // end for each covered token
            }
        // end if not already contained
        }
    // end for each segmentation node
    }
    return covered;
}
Also used : RelannisNodeFeature(annis.model.RelannisNodeFeature) SNode(org.corpus_tools.salt.core.SNode) HashMap(java.util.HashMap) SToken(org.corpus_tools.salt.common.SToken) Map(java.util.Map) CorpusConfigMap(annis.service.objects.CorpusConfigMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) SFeature(org.corpus_tools.salt.core.SFeature)

Aggregations

SNode (org.corpus_tools.salt.core.SNode)37 SToken (org.corpus_tools.salt.common.SToken)15 SRelation (org.corpus_tools.salt.core.SRelation)14 ArrayList (java.util.ArrayList)13 LinkedList (java.util.LinkedList)12 SDocumentGraph (org.corpus_tools.salt.common.SDocumentGraph)12 SFeature (org.corpus_tools.salt.core.SFeature)12 RelannisNodeFeature (annis.model.RelannisNodeFeature)11 HashMap (java.util.HashMap)11 SAnnotation (org.corpus_tools.salt.core.SAnnotation)8 Map (java.util.Map)6 SDocument (org.corpus_tools.salt.common.SDocument)6 HashSet (java.util.HashSet)5 SCorpusGraph (org.corpus_tools.salt.common.SCorpusGraph)5 Annotation (annis.model.Annotation)4 TreeMap (java.util.TreeMap)4 SCorpus (org.corpus_tools.salt.common.SCorpus)4 SaltProject (org.corpus_tools.salt.common.SaltProject)4 SLayer (org.corpus_tools.salt.core.SLayer)4 JSONException (org.json.JSONException)4