Search in sources :

Example 1 with SSpan

use of org.corpus_tools.salt.common.SSpan in project ANNIS by korpling.

the class HTMLVis method createHTML.

public String createHTML(SDocumentGraph graph, VisualizationDefinition[] definitions) {
    HashMap<VisualizationDefinition, Integer> instruction_priorities = new HashMap<>();
    SortedMap<Long, List<OutputItem>> outputStartTags = new TreeMap<>();
    SortedMap<Long, List<OutputItem>> outputEndTags = new TreeMap<>();
    StringBuilder sb = new StringBuilder();
    List<SToken> token = graph.getSortedTokenByText();
    // Get metadata for visualizer if stylesheet requires it
    // First check the stylesheet
    Boolean bolMetaTypeFound = false;
    HashMap<String, String> meta = new HashMap<>();
    int def_priority = 0;
    for (VisualizationDefinition vis : definitions) {
        if (vis.getOutputter().getType() == SpanHTMLOutputter.Type.META_NAME) {
            bolMetaTypeFound = true;
        } else // not a meta-annotation, remember order in config file to set priority
        {
            if (vis.getMatcher() instanceof AnnotationNameMatcher) {
                instruction_priorities.put(vis, def_priority);
            } else if (vis.getMatcher() instanceof AnnotationNameAndValueMatcher) {
                instruction_priorities.put(vis, def_priority);
            } else if (vis.getMatcher() instanceof TokenMatcher) {
                instruction_priorities.put(vis, def_priority);
            }
            def_priority--;
        }
        vis.getOutputter().setMeta(meta);
    }
    if (// Metadata is required, get corpus and document name
    bolMetaTypeFound == true) {
        // Get corpus and document name
        String strDocName = "";
        String strCorpName = "";
        strDocName = graph.getDocument().getName();
        List<String> corpusPath = CommonHelper.getCorpusPath(graph.getDocument().getGraph(), graph.getDocument());
        strCorpName = corpusPath.get(corpusPath.size() - 1);
        // Get metadata and put in hashmap
        List<Annotation> metaData = Helper.getMetaDataDoc(strCorpName, strDocName);
        for (Annotation metaDatum : metaData) {
            meta.put(metaDatum.getName(), metaDatum.getValue());
        }
    }
    for (SToken t : token) {
        tokenColor = "";
        if (mc.containsKey(t) && hitMark) {
            tokenColor = MatchedNodeColors.getHTMLColorByMatch(mc.get(t));
        }
        for (VisualizationDefinition vis : definitions) {
            String matched = vis.getMatcher().matchedAnnotation(t);
            if (matched != null) {
                vis.getOutputter().outputHTML(t, matched, outputStartTags, outputEndTags, tokenColor, Objects.firstNonNull(instruction_priorities.get(vis), 0));
            }
        }
    }
    List<SSpan> spans = graph.getSpans();
    for (VisualizationDefinition vis : definitions) {
        for (SSpan span : spans) {
            tokenColor = "";
            if (mc.containsKey(span) && hitMark) {
                tokenColor = MatchedNodeColors.getHTMLColorByMatch(mc.get(span));
            }
            String matched = vis.getMatcher().matchedAnnotation(span);
            if (matched != null) {
                vis.getOutputter().outputHTML(span, matched, outputStartTags, outputEndTags, tokenColor, Objects.firstNonNull(instruction_priorities.get(vis), 0));
            }
        }
    }
    int minStartTagPos = outputStartTags.firstKey().intValue();
    int maxEndTagPos = outputEndTags.lastKey().intValue();
    // Find BEGIN and END instructions if available
    for (VisualizationDefinition vis : definitions) {
        if (vis.getMatcher() instanceof PseudoRegionMatcher) {
            PseudoRegionMatcher.PseudoRegion psdRegionType = ((PseudoRegionMatcher) vis.getMatcher()).getPsdRegion();
            int positionStart = 0;
            int positionEnd = 0;
            if (!outputEndTags.isEmpty() && !outputStartTags.isEmpty() && psdRegionType != null) {
                switch(psdRegionType) {
                    case BEGIN:
                        positionStart = positionEnd = Integer.MIN_VALUE;
                        // def_priority is now lower than all normal annotation
                        instruction_priorities.put(vis, def_priority);
                        break;
                    case END:
                        positionStart = positionEnd = Integer.MAX_VALUE;
                        // def_priority is now lower than all normal annotation
                        instruction_priorities.put(vis, def_priority);
                        break;
                    case ALL:
                        // use same position as last and first key
                        positionStart = minStartTagPos;
                        positionEnd = maxEndTagPos;
                        // The ALL pseudo-range must enclose everything, thus it get the
                        // priority which is one lower than the smallest non BEGIN/END
                        // priority.
                        instruction_priorities.put(vis, def_priority);
                        break;
                    default:
                        break;
                }
            }
            switch(vis.getOutputter().getType()) {
                case META_NAME:
                    String strMetaVal = meta.get(vis.getOutputter().getMetaname().trim());
                    if (strMetaVal == null) {
                        throw new NullPointerException("no such metadata name in document: '" + vis.getOutputter().getMetaname().trim() + "'");
                    } else {
                        vis.getOutputter().outputAny(positionStart, positionEnd, ((PseudoRegionMatcher) vis.getMatcher()).getAnnotationName(), strMetaVal, outputStartTags, outputEndTags, Objects.firstNonNull(instruction_priorities.get(vis), 0));
                    }
                    break;
                case CONSTANT:
                    vis.getOutputter().outputAny(positionStart, positionEnd, ((PseudoRegionMatcher) vis.getMatcher()).getAnnotationName(), vis.getOutputter().getConstant(), outputStartTags, outputEndTags, Objects.firstNonNull(instruction_priorities.get(vis), 0));
                    break;
                case EMPTY:
                    vis.getOutputter().outputAny(positionStart, positionEnd, ((PseudoRegionMatcher) vis.getMatcher()).getAnnotationName(), "", outputStartTags, outputEndTags, Objects.firstNonNull(instruction_priorities.get(vis), 0));
                    break;
                case ANNO_NAME:
                    // this shouldn't happen, since the BEGIN/END instruction has no triggering annotation name or value
                    break;
                case VALUE:
                    // this shouldn't happen, since the BEGIN/END instruction has no triggering annotation name or value
                    break;
                case ESCAPED_VALUE:
                    // this shouldn't happen, since the BEGIN/END instruction has no triggering annotation name or value
                    break;
                default:
            }
        }
    }
    // get all used indexes
    Set<Long> indexes = new TreeSet<>();
    indexes.addAll(outputStartTags.keySet());
    indexes.addAll(outputEndTags.keySet());
    for (Long i : indexes) {
        // output all strings belonging to this token position
        // first the start tags for this position
        // add priorities from instruction_priorities for sorting length ties
        List<OutputItem> unsortedStart = outputStartTags.get(i);
        SortedSet<OutputItem> itemsStart = new TreeSet();
        if (unsortedStart != null) {
            Iterator<OutputItem> it = unsortedStart.iterator();
            while (it.hasNext()) {
                OutputItem s = it.next();
                itemsStart.add(s);
            }
        }
        {
            Iterator<OutputItem> it = itemsStart.iterator();
            boolean first = true;
            while (it.hasNext()) {
                OutputItem s = it.next();
                if (!first) {
                    sb.append("-->");
                }
                first = false;
                sb.append(s.getOutputString());
                if (it.hasNext()) {
                    sb.append("<!--\n");
                }
            }
        }
        // then the end tags for this position, but inverse their order
        List<OutputItem> unsortedEnd = outputEndTags.get(i);
        SortedSet<OutputItem> itemsEnd = new TreeSet();
        if (unsortedEnd != null) {
            Iterator<OutputItem> it = unsortedEnd.iterator();
            while (it.hasNext()) {
                OutputItem s = it.next();
                itemsEnd.add(s);
            }
        }
        {
            List<OutputItem> itemsEndReverse = new LinkedList<>(itemsEnd);
            Collections.reverse(itemsEndReverse);
            for (OutputItem s : itemsEndReverse) {
                sb.append(s.getOutputString());
            }
        }
    }
    return sb.toString();
}
Also used : SSpan(org.corpus_tools.salt.common.SSpan) HashMap(java.util.HashMap) SToken(org.corpus_tools.salt.common.SToken) TreeSet(java.util.TreeSet) Iterator(java.util.Iterator) List(java.util.List) LinkedList(java.util.LinkedList) TreeMap(java.util.TreeMap) Annotation(annis.model.Annotation)

Example 2 with SSpan

use of org.corpus_tools.salt.common.SSpan in project ANNIS by korpling.

the class PDFPageHelper method getAllSSpanWithPageNumber.

private void getAllSSpanWithPageNumber(SDocumentGraph graph) {
    if (graph == null) {
        log.error("could not get page annos from empty graph");
        return;
    }
    List<SSpan> sSpans = graph.getSpans();
    if (sSpans != null) {
        for (SSpan s : sSpans) {
            Set<SAnnotation> sAnnotations = s.getAnnotations();
            if (sAnnotations != null) {
                for (SAnnotation anno : sAnnotations) {
                    // TODO support mappings of resolver vis map
                    if (getPDFPageAnnotationName().equals(anno.getName())) {
                        int leftIdx = getLeftIndexFromSNode(s);
                        int rightIdx = getRightIndexFromSNode(s);
                        if (sspans.containsKey(leftIdx)) {
                            if (sspans.get(leftIdx).containsKey(rightIdx)) {
                                log.warn("an intervall {}-{} is overrided by: {}", s);
                            }
                            sspans.get(leftIdx).put(rightIdx, s);
                        } else {
                            sspans.put(leftIdx, new TreeMap<Integer, SSpan>());
                            sspans.get(leftIdx).put(rightIdx, s);
                        }
                    }
                }
            }
        }
    }
}
Also used : SSpan(org.corpus_tools.salt.common.SSpan) SAnnotation(org.corpus_tools.salt.core.SAnnotation)

Example 3 with SSpan

use of org.corpus_tools.salt.common.SSpan in project ANNIS by korpling.

the class PDFPageHelper method getMostLeftAndMostRightPageAnno.

/**
 * Creates a String (eg. <b>3-9</b> or <b>3</b>), based on the most left and
 * most right page annotation.
 *
 * <p>The page annotation is detected with
 * {@link #getPageFromAnnotation(de.hu_berlin.german.korpling.saltnpepper.salt.saltCommon.sDocumentStructure.SSpan)}</p>
 *
 * @return A String which represents the start and the end page of a pdf,
 * seperated by {@link #PAGE_NUMBER_SEPERATOR}. If there is no end page, or
 * exactly one page annotation, only a String with one number is returned.
 */
public String getMostLeftAndMostRightPageAnno() {
    if (sspans == null || sspans.isEmpty()) {
        return null;
    }
    TreeMap<Integer, SSpan> rightTokIdxToSSpan = sspans.get(sspans.firstKey());
    SSpan leftSpan = rightTokIdxToSSpan.get(rightTokIdxToSSpan.firstKey());
    SSpan rightSpan = null;
    Integer rightIdx = null;
    for (Integer leftIdxKey : sspans.keySet()) {
        for (Integer rightIdxKey : sspans.get(leftIdxKey).keySet()) {
            if (rightIdx == null || rightIdx <= rightIdxKey) {
                rightIdx = rightIdxKey;
                rightSpan = sspans.get(leftIdxKey).get(rightIdx);
            }
        }
    }
    if (rightIdx != null) {
        return getPageFromAnnotation(leftSpan) + PAGE_NUMBER_SEPERATOR + getPageFromAnnotation(rightSpan);
    }
    return getPageFromAnnotation(leftSpan);
}
Also used : SSpan(org.corpus_tools.salt.common.SSpan)

Example 4 with SSpan

use of org.corpus_tools.salt.common.SSpan in project ANNIS by korpling.

the class EventExtractor method addAnnotationsForNode.

private static void addAnnotationsForNode(SNode node, SDocumentGraph graph, long startTokenIndex, long endTokenIndex, PDFController pdfController, PDFPageHelper pageNumberHelper, AtomicInteger eventCounter, LinkedHashMap<String, ArrayList<Row>> rowsByAnnotation, boolean addMatch, Set<String> mediaLayer, boolean replaceValueWithMediaIcon) {
    List<String> matchedAnnos = new ArrayList<>();
    SFeature featMatchedAnnos = graph.getFeature(ANNIS_NS, FEAT_MATCHEDANNOS);
    if (featMatchedAnnos != null) {
        matchedAnnos = Splitter.on(',').trimResults().splitToList(featMatchedAnnos.getValue_STEXT());
    }
    // check if the span is a matched node
    SFeature featMatched = node.getFeature(ANNIS_NS, FEAT_MATCHEDNODE);
    Long matchRaw = featMatched == null ? null : featMatched.getValue_SNUMERIC();
    String matchedQualifiedAnnoName = "";
    if (matchRaw != null && matchRaw <= matchedAnnos.size()) {
        matchedQualifiedAnnoName = matchedAnnos.get((int) ((long) matchRaw) - 1);
    }
    // calculate the left and right values of a span
    // TODO: howto get these numbers with Salt?
    RelannisNodeFeature feat = (RelannisNodeFeature) node.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
    long leftLong = feat.getLeftToken();
    long rightLong = feat.getRightToken();
    leftLong = clip(leftLong, startTokenIndex, endTokenIndex);
    rightLong = clip(rightLong, startTokenIndex, endTokenIndex);
    int left = (int) (leftLong - startTokenIndex);
    int right = (int) (rightLong - startTokenIndex);
    for (SAnnotation anno : node.getAnnotations()) {
        ArrayList<Row> rows = rowsByAnnotation.get(anno.getQName());
        if (rows == null) {
            // try again with only the name
            rows = rowsByAnnotation.get(anno.getName());
        }
        if (rows != null) {
            // only do something if the annotation was defined before
            // 1. give each annotation of each span an own row
            Row r = new Row();
            String id = "event_" + eventCounter.incrementAndGet();
            GridEvent event = new GridEvent(id, left, right, anno.getValue_STEXT());
            event.setTooltip(Helper.getQualifiedName(anno));
            if (addMatch && matchRaw != null) {
                long match = matchRaw;
                if (matchedQualifiedAnnoName.isEmpty()) {
                    // always set the match when there is no matched annotation at all
                    event.setMatch(match);
                } else // check if the annotation also matches
                if (matchedQualifiedAnnoName.equals(anno.getQName())) {
                    event.setMatch(match);
                }
            }
            if (node instanceof SSpan) {
                // calculate overlapped SToken
                List<? extends SRelation<? extends SNode, ? extends SNode>> outEdges = graph.getOutRelations(node.getId());
                if (outEdges != null) {
                    for (SRelation<? extends SNode, ? extends SNode> e : outEdges) {
                        if (e instanceof SSpanningRelation) {
                            SSpanningRelation spanRel = (SSpanningRelation) e;
                            SToken tok = spanRel.getTarget();
                            event.getCoveredIDs().add(tok.getId());
                            // get the STextualDS of this token and add it to the event
                            String textID = getTextID(tok, graph);
                            if (textID != null) {
                                event.setTextID(textID);
                            }
                        }
                    }
                }
            // end if span has out edges
            } else if (node instanceof SToken) {
                event.getCoveredIDs().add(node.getId());
                // get the STextualDS of this token and add it to the event
                String textID = getTextID((SToken) node, graph);
                if (textID != null) {
                    event.setTextID(textID);
                }
            }
            // try to get time annotations
            if (mediaLayer == null || mediaLayer.contains(anno.getQName())) {
                double[] startEndTime = TimeHelper.getOverlappedTime(node);
                if (startEndTime.length == 1) {
                    if (replaceValueWithMediaIcon) {
                        event.setValue(" ");
                        event.setTooltip("play excerpt " + event.getStartTime());
                    }
                    event.setStartTime(startEndTime[0]);
                } else if (startEndTime.length == 2) {
                    event.setStartTime(startEndTime[0]);
                    event.setEndTime(startEndTime[1]);
                    if (replaceValueWithMediaIcon) {
                        event.setValue(" ");
                        event.setTooltip("play excerpt " + event.getStartTime() + "-" + event.getEndTime());
                    }
                }
            }
            r.addEvent(event);
            rows.add(r);
            if (pdfController != null && pdfController.sizeOfRegisterdPDFViewer() > 0) {
                String page = pageNumberHelper.getPageFromAnnotation(node);
                if (page != null) {
                    event.setPage(page);
                }
            }
        }
    }
// end for each annotation of span
}
Also used : RelannisNodeFeature(annis.model.RelannisNodeFeature) GridEvent(annis.gui.widgets.grid.GridEvent) SSpan(org.corpus_tools.salt.common.SSpan) SAnnotation(org.corpus_tools.salt.core.SAnnotation) ArrayList(java.util.ArrayList) SToken(org.corpus_tools.salt.common.SToken) SSpanningRelation(org.corpus_tools.salt.common.SSpanningRelation) Row(annis.gui.widgets.grid.Row) SFeature(org.corpus_tools.salt.core.SFeature)

Example 5 with SSpan

use of org.corpus_tools.salt.common.SSpan in project ANNIS by korpling.

the class EventExtractor method parseSalt.

/**
 * Converts Salt document graph to rows.
 *
 * @param input
 * @param showSpanAnnos
 * @param showTokenAnnos
 * @param mediaLayer  A set of all annotation layers which should be treated as special media layer.
 * @param annotationNames
 * @param replaceValueWithMediaIcon If true the actual value is removed and an icon for playing the media file is shown instead.
 * @param startTokenIndex token index of the first token in the match
 * @param endTokenIndex token index of the last token in the match
 * @param pdfController makes status of all pdfviewer available for the
 * events.
 * @param text If non-null only include annotations for nodes of the specified text.
 * @return
 */
public static LinkedHashMap<String, ArrayList<Row>> parseSalt(VisualizerInput input, boolean showSpanAnnos, boolean showTokenAnnos, List<String> annotationNames, Set<String> mediaLayer, boolean replaceValueWithMediaIcon, long startTokenIndex, long endTokenIndex, PDFController pdfController, STextualDS text) {
    SDocumentGraph graph = input.getDocument().getDocumentGraph();
    // only look at annotations which were defined by the user
    LinkedHashMap<String, ArrayList<Row>> rowsByAnnotation = new LinkedHashMap<>();
    for (String anno : annotationNames) {
        rowsByAnnotation.put(anno, new ArrayList<Row>());
    }
    AtomicInteger eventCounter = new AtomicInteger();
    PDFPageHelper pageNumberHelper = new PDFPageHelper(input);
    if (showSpanAnnos) {
        for (SSpan span : graph.getSpans()) {
            if (text == null || text == CommonHelper.getTextualDSForNode(span, graph)) {
                addAnnotationsForNode(span, graph, startTokenIndex, endTokenIndex, pdfController, pageNumberHelper, eventCounter, rowsByAnnotation, true, mediaLayer, replaceValueWithMediaIcon);
            }
        }
    // end for each span
    }
    if (showTokenAnnos) {
        for (SToken tok : graph.getTokens()) {
            if (text == null || text == CommonHelper.getTextualDSForNode(tok, graph)) {
                addAnnotationsForNode(tok, graph, startTokenIndex, endTokenIndex, pdfController, pageNumberHelper, eventCounter, rowsByAnnotation, false, mediaLayer, replaceValueWithMediaIcon);
            }
        }
    }
    // 2. merge rows when possible
    for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
        mergeAllRowsIfPossible(e.getValue());
    }
    // 3. sort events on one row by left token index
    for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
        for (Row r : e.getValue()) {
            sortEventsByTokenIndex(r);
        }
    }
    // 4. split up events if they cover islands
    for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
        for (Row r : e.getValue()) {
            splitRowsOnIslands(r, graph, text, startTokenIndex, endTokenIndex);
        }
    }
    // 5. split up events if they have gaps
    for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
        for (Row r : e.getValue()) {
            splitRowsOnGaps(r, graph, startTokenIndex, endTokenIndex);
        }
    }
    return rowsByAnnotation;
}
Also used : SSpan(org.corpus_tools.salt.common.SSpan) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) SToken(org.corpus_tools.salt.common.SToken) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) PDFPageHelper(annis.libgui.PDFPageHelper) Row(annis.gui.widgets.grid.Row) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap)

Aggregations

SSpan (org.corpus_tools.salt.common.SSpan)12 SToken (org.corpus_tools.salt.common.SToken)8 SSpanningRelation (org.corpus_tools.salt.common.SSpanningRelation)4 HashMap (java.util.HashMap)3 SAnnotation (org.corpus_tools.salt.core.SAnnotation)3 SNode (org.corpus_tools.salt.core.SNode)3 SRelation (org.corpus_tools.salt.core.SRelation)3 Row (annis.gui.widgets.grid.Row)2 RelannisNodeFeature (annis.model.RelannisNodeFeature)2 ArrayList (java.util.ArrayList)2 LinkedList (java.util.LinkedList)2 Map (java.util.Map)2 TreeMap (java.util.TreeMap)2 TreeSet (java.util.TreeSet)2 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)2 SDocumentGraph (org.corpus_tools.salt.common.SDocumentGraph)2 SDominanceRelation (org.corpus_tools.salt.common.SDominanceRelation)2 SOrderRelation (org.corpus_tools.salt.common.SOrderRelation)2 SPointingRelation (org.corpus_tools.salt.common.SPointingRelation)2 SFeature (org.corpus_tools.salt.core.SFeature)2