Search in sources :

Example 6 with SToken

use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.

the class SaltAnnotateExtractor method createSinglePrimaryText.

private void createSinglePrimaryText(SDocumentGraph graph, long textID, TreeMap<Long, String> tokenTexts, TreeMap<Long, SToken> tokenByIndex) {
    STextualDS textDataSource = SaltFactory.createSTextualDS();
    textDataSource.setName("sText" + textID);
    graph.addNode(textDataSource);
    StringBuilder sbText = new StringBuilder();
    Iterator<Map.Entry<Long, String>> itToken = tokenTexts.entrySet().iterator();
    long index = 0;
    while (itToken.hasNext()) {
        Map.Entry<Long, String> e = itToken.next();
        SToken tok = tokenByIndex.get(e.getKey());
        SFeature rawFeature = tok.getFeature(SaltUtil.createQName(ANNIS_NS, FEAT_RELANNIS_NODE));
        if (rawFeature != null) {
            RelannisNodeFeature feat = (RelannisNodeFeature) rawFeature.getValue();
            if (feat.getTextRef() == textID) {
                STextualRelation textRel = SaltFactory.createSTextualRelation();
                textRel.setSource(tok);
                textRel.setTarget(textDataSource);
                textRel.setStart(sbText.length());
                textRel.setEnd(sbText.length() + e.getValue().length());
                textRel.setName("sTextRel" + textID + "_" + (index++));
                textRel.setTarget(textDataSource);
                graph.addRelation(textRel);
                sbText.append(e.getValue());
                if (itToken.hasNext()) {
                    sbText.append(" ");
                }
            }
        }
    }
    textDataSource.setText(sbText.toString());
}
Also used : RelannisNodeFeature(annis.model.RelannisNodeFeature) STextualRelation(org.corpus_tools.salt.common.STextualRelation) SToken(org.corpus_tools.salt.common.SToken) STextualDS(org.corpus_tools.salt.common.STextualDS) Map(java.util.Map) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) SFeature(org.corpus_tools.salt.core.SFeature)

Example 7 with SToken

use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.

the class SaltAnnotateExtractor method createNewRelation.

private SRelation createNewRelation(SDocumentGraph graph, SStructuredNode sourceNode, SNode targetNode, String relationName, String type, long componentID, SLayer layer, long pre, FastInverseMap<Long, SNode> nodeByRankID, AtomicInteger numberOfRelations) {
    SRelation rel = null;
    if (null != type) // create new relation
    {
        switch(type) {
            case "d":
                SDominanceRelation domrel = SaltFactory.createSDominanceRelation();
                // always set a name by ourself since the SDocumentGraph#basicAddRelation()
                // functions otherwise real slow
                domrel.setName("sDomRel" + numberOfRelations.incrementAndGet());
                rel = domrel;
                if (sourceNode != null && !(sourceNode instanceof SStructure)) {
                    log.debug("Mismatched source type: should be SStructure");
                    SNode oldNode = sourceNode;
                    sourceNode = recreateNode(SStructure.class, sourceNode);
                    updateMapAfterRecreatingNode(oldNode, sourceNode, nodeByRankID);
                }
                if (relationName == null || relationName.isEmpty()) {
                    // layer but has a non-empty relation name
                    if (handleArtificialDominanceRelation(graph, sourceNode, targetNode, rel, layer, componentID, pre)) {
                        // don't include this relation
                        rel = null;
                    }
                }
                break;
            case "c":
                SSpanningRelation spanrel = SaltFactory.createSSpanningRelation();
                // always set a name by ourself since the SDocumentGraph#basicAddRelation()
                // functions is real slow otherwise
                spanrel.setName("sSpanRel" + numberOfRelations.incrementAndGet());
                rel = spanrel;
                sourceNode = testAndFixNonSpan(sourceNode, nodeByRankID);
                break;
            case "p":
                SPointingRelation pointingrel = SaltFactory.createSPointingRelation();
                pointingrel.setName("sPointingRel" + numberOfRelations.incrementAndGet());
                rel = pointingrel;
                break;
            default:
                throw new IllegalArgumentException("Invalid type " + type + " for new Relation");
        }
        try {
            if (rel != null) {
                rel.setType(relationName);
                RelannisEdgeFeature featRelation = new RelannisEdgeFeature();
                featRelation.setPre(pre);
                featRelation.setComponentID(componentID);
                SFeature sfeatRelation = SaltFactory.createSFeature();
                sfeatRelation.setNamespace(ANNIS_NS);
                sfeatRelation.setName(FEAT_RELANNIS_EDGE);
                sfeatRelation.setValue(featRelation);
                rel.addFeature(sfeatRelation);
                rel.setSource(sourceNode);
                if ("c".equals(type) && !(targetNode instanceof SToken)) {
                    log.warn("invalid relation detected: target node ({}) " + "of a coverage relation (from: {}, internal id {}) was not a token", new Object[] { targetNode.getName(), sourceNode == null ? "null" : sourceNode.getName(), "" + pre });
                } else {
                    rel.setTarget(targetNode);
                    graph.addRelation(rel);
                    layer.addRelation(rel);
                }
            }
        } catch (SaltException ex) {
            log.warn("invalid relation detected", ex);
        }
    }
    return rel;
}
Also used : SPointingRelation(org.corpus_tools.salt.common.SPointingRelation) SToken(org.corpus_tools.salt.common.SToken) SRelation(org.corpus_tools.salt.core.SRelation) SNode(org.corpus_tools.salt.core.SNode) SaltException(org.corpus_tools.salt.exceptions.SaltException) SSpanningRelation(org.corpus_tools.salt.common.SSpanningRelation) RelannisEdgeFeature(annis.model.RelannisEdgeFeature) SDominanceRelation(org.corpus_tools.salt.common.SDominanceRelation) SStructure(org.corpus_tools.salt.common.SStructure) SFeature(org.corpus_tools.salt.core.SFeature)

Example 8 with SToken

use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.

the class TextColumnExporter method createAdjacencyMatrix.

/**
 * Implements the abstract method of the [SaltBasedExporter](\ref annis.gui.exporter.SaltBasedExporter).
 * This method creates and fills an adjacency matrix of dimension (nodeCount x nodeCount), which keeps the relative order
 * of match numbers to each other of each query result line. A result line is a part of a record, which belongs to a speaker.
 *
 * The adjacency matrix is a global two-dimensional array of integers, which allows to recognize the valid order of match numbers globally, after all query results are processed.
 *
 * @param graph  an org.corpus_tools.salt.common.SDocumentGraph representation of a record
 * @param args a map containing parameters like 'filter' or 'metakeys', set by user
 * @param recordNumber the number of record within  the record set returned for the user query
 * @param nodeCount the count of distinct match numbers in the whole record set returned for the user query
 */
@Override
public void createAdjacencyMatrix(SDocumentGraph graph, Map<String, String> args, int recordNumber, int nodeCount) throws IOException {
    String currSpeakerName = "";
    String prevSpeakerName = "";
    List<Long> matchNumbersOrdered = new ArrayList<Long>();
    // if new search, reset adjacencyMatrix, extract parameters, set by user
    if (recordNumber == 0) {
        speakerHasMatches.clear();
        speakerName = "";
        tokenToMatchNumber.clear();
        filterNumbersSetByUser.clear();
        filterNumbersIsEmpty = true;
        listOfMetakeys.clear();
        adjacencyMatrix = new int[nodeCount][nodeCount];
        matrixIsFilled = false;
        singleMatchesGlobal.clear();
        orderedMatchNumbersGlobal.clear();
        matchNumbersGlobal.clear();
        dataIsAlignable = true;
        maxMatchesPerLine = 0;
        // initialize adjacency matrix
        for (int i = 0; i < adjacencyMatrix.length; i++) {
            for (int j = 0; j < adjacencyMatrix[0].length; j++) {
                adjacencyMatrix[i][j] = -1;
            }
        }
        // extract filter numbers, if set
        if (args.containsKey(FILTER_PARAMETER_KEYWORD)) {
            String parameters = args.get(FILTER_PARAMETER_KEYWORD);
            String[] numbers = parameters.split(PARAMETER_SEPARATOR);
            for (int i = 0; i < numbers.length; i++) {
                try {
                    Long number = Long.parseLong(numbers[i]);
                    filterNumbersSetByUser.add(number);
                } catch (NumberFormatException e) {
                    ;
                }
            }
        }
        if (!filterNumbersSetByUser.isEmpty()) {
            filterNumbersIsEmpty = false;
        }
        // extract metakeys
        if (args.containsKey(METAKEYS_KEYWORD)) {
            String parameters = args.get(METAKEYS_KEYWORD);
            String[] metakeys = parameters.split(PARAMETER_SEPARATOR);
            for (int i = 0; i < metakeys.length; i++) {
                String metakey = metakeys[i].trim();
                listOfMetakeys.add(metakey);
            }
        }
    }
    if (graph != null) {
        List<SToken> orderedToken = graph.getSortedTokenByText();
        // iterate over all token
        if (orderedToken != null) {
            // reset counter over all the tokens
            if (recordNumber == 0) {
                counterGlobal = 0;
            }
            // iterate first time over tokens to figure out which speaker has matches and to recognize the hierarchical structure of matches as well
            for (SToken token : orderedToken) {
                counterGlobal++;
                String name;
                if ((name = CommonHelper.getTextualDSForNode(token, graph).getName()) == null) {
                    name = "";
                }
                speakerName = (recordNumber + 1) + "_" + name;
                currSpeakerName = speakerName;
                // reset data structures for new speaker
                if (!currSpeakerName.equals(prevSpeakerName)) {
                    matchNumbersOrdered.clear();
                }
                if (!speakerHasMatches.containsKey(currSpeakerName)) {
                    speakerHasMatches.put(currSpeakerName, false);
                }
                List<SNode> root = new LinkedList<>();
                root.add(token);
                IsDominatedByMatch traverserSpeakerSearch = new IsDominatedByMatch();
                // reset list
                dominatedMatchCodes.clear();
                graph.traverse(root, GRAPH_TRAVERSE_TYPE.BOTTOM_UP_DEPTH_FIRST, TRAV_PREPROCESSING, traverserSpeakerSearch);
                if (!dominatedMatchCodes.isEmpty()) {
                    // if filter numbers not set by user, take the number of the highest match node
                    if (filterNumbersIsEmpty) {
                        tokenToMatchNumber.put(counterGlobal, dominatedMatchCodes.get(dominatedMatchCodes.size() - 1));
                        // set filter number to the ordered list
                        if (!matchNumbersOrdered.contains(dominatedMatchCodes.get(dominatedMatchCodes.size() - 1))) {
                            matchNumbersOrdered.add(dominatedMatchCodes.get(dominatedMatchCodes.size() - 1));
                        }
                    } else {
                        // take the highest match code, which is present in filterNumbers
                        boolean filterNumberFound = false;
                        for (int i = dominatedMatchCodes.size() - 1; i >= 0; i--) {
                            if (filterNumbersSetByUser.contains(dominatedMatchCodes.get(i))) {
                                tokenToMatchNumber.put(counterGlobal, dominatedMatchCodes.get(i));
                                if (!matchNumbersOrdered.contains(dominatedMatchCodes.get(i))) {
                                    if (!filterNumberFound) {
                                        matchNumbersOrdered.add(dominatedMatchCodes.get(i));
                                        filterNumberFound = true;
                                    }
                                }
                                break;
                            }
                        }
                    }
                    // reset maxMatchesPerLine
                    if (maxMatchesPerLine < matchNumbersOrdered.size()) {
                        maxMatchesPerLine = matchNumbersOrdered.size();
                    }
                    // fill the adjacency matrix
                    if (matchNumbersOrdered.size() > 1) {
                        Iterator<Long> it = matchNumbersOrdered.iterator();
                        int prev = Integer.parseInt(String.valueOf((Long) it.next()));
                        matchNumbersGlobal.add(prev);
                        while (it.hasNext()) {
                            int curr = Integer.parseInt(String.valueOf((Long) it.next()));
                            matchNumbersGlobal.add(curr);
                            adjacencyMatrix[prev - 1][curr - 1] = 1;
                            matrixIsFilled = true;
                            prev = curr;
                        }
                    } else {
                        matchNumbersGlobal.add(Integer.parseInt(String.valueOf(matchNumbersOrdered.get(0))));
                        singleMatchesGlobal.add(matchNumbersOrdered.get(0));
                    }
                }
                // set previous speaker name
                prevSpeakerName = currSpeakerName;
            }
        }
    }
}
Also used : SNode(org.corpus_tools.salt.core.SNode) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) SToken(org.corpus_tools.salt.common.SToken)

Example 9 with SToken

use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.

the class CommonHelper method getSortedSegmentationNodes.

/**
 * Calculates a {@link SOrderRelation} node chain of a {@link SDocumentGraph}.
 *
 * <p>
 * If no segmentation name is set, a list of sorted {@link SToken} will be
 * returned.<p>
 *
 * @param segName The segmentation name, for which the chain is computed.
 * @param graph The salt document graph, which is traversed for the
 * segmentation.
 *
 * @return Returns a List of {@link SNode}, which is sorted by the
 * {@link SOrderRelation}.
 */
public static List<SNode> getSortedSegmentationNodes(String segName, SDocumentGraph graph) {
    List<SNode> token = new ArrayList<SNode>();
    if (segName == null) {
        // if no segmentation is given just return the sorted token list
        List<SToken> unsortedToken = graph.getSortedTokenByText();
        if (unsortedToken != null) {
            token.addAll(unsortedToken);
        }
    } else {
        // get the very first node of the order relation chain
        Set<SNode> startNodes = new LinkedHashSet<SNode>();
        for (SNode n : graph.getNodes()) {
            SFeature feat = n.getFeature(AnnisConstants.ANNIS_NS, AnnisConstants.FEAT_FIRST_NODE_SEGMENTATION_CHAIN);
            if (feat != null && segName.equalsIgnoreCase(feat.getValue_STEXT())) {
                startNodes.add(n);
            }
        }
        Set<String> alreadyAdded = new HashSet<String>();
        // add all nodes on the order relation chain beginning from the start node
        for (SNode s : startNodes) {
            SNode current = s;
            while (current != null) {
                token.add(current);
                List<SRelation<SNode, SNode>> out = graph.getOutRelations(current.getId());
                current = null;
                if (out != null) {
                    for (SRelation<? extends SNode, ? extends SNode> e : out) {
                        if (e instanceof SOrderRelation) {
                            current = ((SOrderRelation) e).getTarget();
                            if (alreadyAdded.contains(current.getId())) {
                                // abort if cycle detected
                                current = null;
                            } else {
                                alreadyAdded.add(current.getId());
                            }
                            break;
                        }
                    }
                }
            }
        }
    }
    return token;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) SNode(org.corpus_tools.salt.core.SNode) ArrayList(java.util.ArrayList) SToken(org.corpus_tools.salt.common.SToken) SRelation(org.corpus_tools.salt.core.SRelation) SOrderRelation(org.corpus_tools.salt.common.SOrderRelation) SFeature(org.corpus_tools.salt.core.SFeature) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 10 with SToken

use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.

the class TimelineReconstructor method convertSpanToToken.

private void convertSpanToToken(SStructuredNode span, String orderName) {
    final Set<String> validSpanAnnos = new HashSet<>(order2spanAnnos.get(orderName));
    if (!nodesToDelete.contains(span)) {
        nodesToDelete.add(span);
        if (textsByName.get(orderName) == null) {
            STextualDS newText = graph.createTextualDS("");
            newText.setName(orderName);
            textsByName.put(orderName, newText);
            textDataByName.put(orderName, new StringBuilder());
        }
        STextualDS textDS = textsByName.get(orderName);
        StringBuilder textData = textDataByName.get(orderName);
        TreeSet<Integer> coveredIdx = new TreeSet<>(spans2TimelinePos.get(span));
        if (!coveredIdx.isEmpty()) {
            SAnnotation textValueAnno = getTextValueAnno(orderName, span);
            if (textValueAnno != null) {
                String textValue = textValueAnno.getValue_STEXT();
                int startTextIdx = textData.length();
                textData.append(textValue);
                int endTextIdx = textData.length();
                SToken newToken = graph.createToken(textDS, startTextIdx, endTextIdx);
                // keep track of changed ids for matches
                if (this.matchIDs.contains(span.getId()))
                    this.oldID2newID.put(span.getId(), newToken.getId());
                // move all features to the new token
                if (span.getFeatures() != null) {
                    for (SFeature feat : span.getFeatures()) {
                        if (!"salt".equals(feat.getNamespace())) {
                            newToken.addFeature(feat);
                        }
                    }
                }
                // move all annotations to the new token
                if (span.getAnnotations() != null) {
                    for (SAnnotation annot : span.getAnnotations()) {
                        if (!"salt".equals(annot.getNamespace()) && !orderName.equals(annot.getName())) {
                            newToken.addAnnotation(annot);
                        }
                    }
                }
                STimelineRelation timeRel = SaltFactory.createSTimelineRelation();
                timeRel.setSource(newToken);
                timeRel.setTarget(graph.getTimeline());
                timeRel.setStart(coveredIdx.first());
                timeRel.setEnd(coveredIdx.last());
                graph.addRelation(timeRel);
                moveRelations(span, newToken, validSpanAnnos, orderName);
            }
        }
    }
}
Also used : SAnnotation(org.corpus_tools.salt.core.SAnnotation) SToken(org.corpus_tools.salt.common.SToken) TreeSet(java.util.TreeSet) STextualDS(org.corpus_tools.salt.common.STextualDS) STimelineRelation(org.corpus_tools.salt.common.STimelineRelation) HashSet(java.util.HashSet) SFeature(org.corpus_tools.salt.core.SFeature)

Aggregations

SToken (org.corpus_tools.salt.common.SToken)30 SNode (org.corpus_tools.salt.core.SNode)16 SRelation (org.corpus_tools.salt.core.SRelation)12 HashMap (java.util.HashMap)9 SAnnotation (org.corpus_tools.salt.core.SAnnotation)9 ArrayList (java.util.ArrayList)8 LinkedList (java.util.LinkedList)8 SSpan (org.corpus_tools.salt.common.SSpan)8 SFeature (org.corpus_tools.salt.core.SFeature)8 RelannisNodeFeature (annis.model.RelannisNodeFeature)7 SSpanningRelation (org.corpus_tools.salt.common.SSpanningRelation)7 SDocumentGraph (org.corpus_tools.salt.common.SDocumentGraph)6 SPointingRelation (org.corpus_tools.salt.common.SPointingRelation)5 Map (java.util.Map)4 TreeMap (java.util.TreeMap)4 SDocument (org.corpus_tools.salt.common.SDocument)4 Annotation (annis.model.Annotation)3 HashSet (java.util.HashSet)3 SDominanceRelation (org.corpus_tools.salt.common.SDominanceRelation)3 STextualDS (org.corpus_tools.salt.common.STextualDS)3