Examples with DataSourceSequence - org.corpus_tools.salt.util.DataSourceSequence

Example 1 with DataSourceSequence

use of org.corpus_tools.salt.util.DataSourceSequence in project ANNIS by korpling.

the class CorefVisualizer method writeOutput.

/**
 * writes Output for the CorefVisualizer
 * @param writer writer to write with
 */
@Override
public void writeOutput(VisualizerInput input, Writer w) {
    // root html element
    Html html = new Html();
    Head head = new Head();
    Body body = new Body();
    html.removeXmlns();
    html.appendChild(head);
    html.appendChild(body);
    try {
        LinkedList<String> fonts = new LinkedList<String>();
        if (input.getFont() != null) {
            Link linkFont = new Link();
            linkFont.setHref(input.getFont().getUrl());
            head.appendChild(linkFont);
            fonts.add(input.getFont().getName());
        }
        fonts.add("serif");
        Link linkJQueryUI = new Link();
        linkJQueryUI.setHref(input.getResourcePath("coref/jquery-ui-1.11.4.custom/jquery-ui.min.css"));
        linkJQueryUI.setRel("stylesheet");
        linkJQueryUI.setType("text/css");
        head.appendChild(linkJQueryUI);
        Link linkJQueryUIStructure = new Link();
        linkJQueryUIStructure.setHref(input.getResourcePath("coref/jquery-ui-1.11.4.custom/jquery-ui.structure.min.css"));
        linkJQueryUIStructure.setRel("stylesheet");
        linkJQueryUIStructure.setType("text/css");
        head.appendChild(linkJQueryUIStructure);
        Script scriptJquery = new Script("text/javascript");
        scriptJquery.setSrc(input.getResourcePath("coref/jquery.js"));
        head.appendChild(scriptJquery);
        Script scriptUI = new Script("text/javascript");
        scriptUI.setSrc(input.getResourcePath("coref/jquery-ui-1.11.4.custom/jquery-ui.min.js"));
        head.appendChild(scriptUI);
        Link linkCoref = new Link();
        linkCoref.setHref(input.getResourcePath("coref/coref.css"));
        linkCoref.setRel("stylesheet");
        linkCoref.setType("text/css");
        head.appendChild(linkCoref);
        Script scriptCoref = new Script("text/javascript");
        scriptCoref.setSrc(input.getResourcePath("coref/CorefVisualizer.js"));
        head.appendChild(scriptCoref);
        body.setStyle("font-family: '" + StringUtils.join(fonts, "', '") + "';");
        // get Info
        globalIndex = 0;
        tokensOfNode = new HashMap<String, List<String>>();
        referentList = new LinkedList<TReferent>();
        komponent = new LinkedList<TComponent>();
        referentOfToken = new HashMap<String, HashMap<Long, Integer>>();
        componentOfToken = new HashMap<String, List<Long>>();
        componenttype = new LinkedList<TComponenttype>();
        SDocument saltDoc = input.getDocument();
        SDocumentGraph saltGraph = saltDoc.getDocumentGraph();
        if (saltGraph == null) {
            body.setText("An Error occured: Could not get Graph of Result (Graph == null).");
            return;
        }
        List<SRelation<SNode, SNode>> edgeList = saltGraph.getRelations();
        if (edgeList == null) {
            return;
        }
        for (SRelation rawRel : edgeList) {
            if (includeEdge(rawRel, input.getNamespace())) {
                SPointingRelation rel = (SPointingRelation) rawRel;
                String relType = componentNameForRelation(rel);
                visitedNodes = new LinkedList<String>();
                // got type for this?
                boolean gotIt = false;
                int componentnr;
                for (componentnr = 0; componentnr < componenttype.size(); componentnr++) {
                    if (componenttype.get(componentnr) != null && componenttype.get(componentnr).type != null && componenttype.get(componentnr).nodeList != null && componenttype.get(componentnr).type.equals(relType) && componenttype.get(componentnr).nodeList.contains(rel.getSource().getId())) {
                        gotIt = true;
                        break;
                    }
                }
                TComponent currentComponent;
                TComponenttype currentComponenttype;
                if (gotIt) {
                    currentComponent = komponent.get(componentnr);
                    currentComponenttype = componenttype.get(componentnr);
                } else {
                    currentComponenttype = new TComponenttype();
                    currentComponenttype.type = relType;
                    componenttype.add(currentComponenttype);
                    componentnr = komponent.size();
                    currentComponent = new TComponent();
                    currentComponent.type = relType;
                    currentComponent.tokenList = new LinkedList<String>();
                    komponent.add(currentComponent);
                    currentComponenttype.nodeList.add(rel.getSource().getId());
                }
                TReferent ref = new TReferent();
                ref.annotations = new HashSet<SerializableAnnotation>();
                for (SAnnotation anno : rel.getAnnotations()) {
                    ref.annotations.add(new SerializableAnnotation(anno));
                }
                ref.component = componentnr;
                referentList.add(ref);
                List<String> currentTokens = getAllTokens(rel.getSource(), componentNameForRelation(rel), currentComponenttype, componentnr, input.getNamespace());
                // neu
                setReferent(rel.getTarget(), globalIndex, 0);
                // neu
                setReferent(rel.getSource(), globalIndex, 1);
                for (String s : currentTokens) {
                    if (!currentComponent.tokenList.contains(s)) {
                        currentComponent.tokenList.add(s);
                    }
                }
                globalIndex++;
            }
        }
        colorlist = new HashMap<Integer, Integer>();
        // A list containing all the generated HTML elements, one list entry
        // for each text.
        List<List<Node>> nodesPerText = new LinkedList<List<Node>>();
        // write output for each text separatly
        List<STextualDS> texts = saltGraph.getTextualDSs();
        if (texts != null && !texts.isEmpty()) {
            for (STextualDS t : texts) {
                DataSourceSequence<Integer> sequence = new DataSourceSequence<>(t, 0, (t.getText() != null) ? t.getText().length() : 0);
                List<SToken> token = saltGraph.getSortedTokenByText(saltGraph.getTokensBySequence(sequence));
                if (token != null) {
                    boolean validText = true;
                    if (Boolean.parseBoolean(input.getMappings().getProperty("hide_empty", "false"))) {
                        validText = false;
                        // check if the text contains any matching annotations
                        for (SToken tok : token) {
                            /* 
                 * The token is only added to this map if an valid edge
                 * (according to the resolver trigger) conntected to 
                 * this token was found.
                 */
                            if (referentOfToken.get(tok.getId()) != null && !referentOfToken.get(tok.getId()).isEmpty()) {
                                validText = true;
                                break;
                            }
                        }
                    }
                    if (validText) {
                        List<Node> nodes = outputSingleText(token, input);
                        nodesPerText.add(nodes);
                    }
                }
            }
            // end for each STexutalDS
            /* 
         * Append the generated output to the body, wrap in table if necessary. 
         */
            // present all texts as columns side by side if using multiple texts
            Table tableTexts = new Table();
            Tr trTextRow = new Tr();
            trTextRow.setCSSClass("textRow");
            // only append wrapper table if we have multiple texts
            if (nodesPerText.size() > 1) {
                body.appendChild(tableTexts);
                tableTexts.appendChild(trTextRow);
            }
            for (List<Node> nodes : nodesPerText) {
                // multi-text mode?
                if (nodesPerText.size() > 1) {
                    Td tdSingleText = new Td();
                    trTextRow.appendChild(tdSingleText);
                    tdSingleText.setCSSClass("text");
                    tdSingleText.appendChild(nodes);
                } else {
                    body.appendChild(nodes);
                }
            }
        } else {
            Text errorTxt = new Text("Could not find any texts for the " + input.getNamespace() + " node namespace (layer).");
            body.appendChild(errorTxt);
        }
        // write HTML4 transitional doctype
        w.append(new Doctype(DocumentType.HTMLTransitional).write());
        // append the html tree
        w.append(html.write());
    } catch (IOException ex) {
        log.error(null, ex);
    }
}

Also used : HashMap(java.util.HashMap) SNode(org.corpus_tools.salt.core.SNode) SStructuredNode(org.corpus_tools.salt.common.SStructuredNode) Node(com.hp.gagawa.java.Node) SToken(org.corpus_tools.salt.common.SToken) Td(com.hp.gagawa.java.elements.Td) SRelation(org.corpus_tools.salt.core.SRelation) SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) STextualDS(org.corpus_tools.salt.common.STextualDS) LinkedList(java.util.LinkedList) List(java.util.List) Body(com.hp.gagawa.java.elements.Body) Tr(com.hp.gagawa.java.elements.Tr) SPointingRelation(org.corpus_tools.salt.common.SPointingRelation) Script(com.hp.gagawa.java.elements.Script) Head(com.hp.gagawa.java.elements.Head) Table(com.hp.gagawa.java.elements.Table) SAnnotation(org.corpus_tools.salt.core.SAnnotation) Html(com.hp.gagawa.java.elements.Html) SDocument(org.corpus_tools.salt.common.SDocument) Doctype(com.hp.gagawa.java.elements.Doctype) Text(com.hp.gagawa.java.elements.Text) IOException(java.io.IOException) DataSourceSequence(org.corpus_tools.salt.util.DataSourceSequence) LinkedList(java.util.LinkedList) Link(com.hp.gagawa.java.elements.Link)

Example 2 with DataSourceSequence

use of org.corpus_tools.salt.util.DataSourceSequence in project ANNIS by korpling.

the class RSTImpl method getText.

/**
 * Gets the overlapping token as string from a node, which are direct
 * dominated by this node.
 *
 * @param currNode
 * @return is null, if there is no relation to a token, or there is more then
 * one STEXT is overlapped by this node
 */
private String getText(SToken currNode) {
    List<DataSourceSequence> sSequences = ((SDocumentGraph) currNode.getGraph()).getOverlappedDataSourceSequence(currNode, SALT_TYPE.STEXT_OVERLAPPING_RELATION);
    // only support one text for spanns
    if (sSequences == null || sSequences.size() != 1) {
        log.error("rst supports only one text and only text level");
        return null;
    }
    log.debug("sSequences {}", sSequences.toString());
    /**
     * Check if it is a text data structure. As described in the salt manual in
     * chapter "5.8 More specific nodes and relations" the start and end point
     * of a range of token is stored in superordinate node of type SSequentialDS
     */
    if (sSequences.get(0).getDataSource() instanceof STextualDS) {
        STextualDS text = ((STextualDS) sSequences.get(0).getDataSource());
        int start = sSequences.get(0).getStart().intValue();
        int end = sSequences.get(0).getEnd().intValue();
        return text.getText().substring(start, end);
    }
    // something fundamentally goes wrong
    log.error("{} instead of {}", sSequences.get(0).getDataSource().getClass().getName(), STextualDS.class.getName());
    return null;
}

Also used : SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) STextualDS(org.corpus_tools.salt.common.STextualDS) DataSourceSequence(org.corpus_tools.salt.util.DataSourceSequence)

Example 3 with DataSourceSequence

use of org.corpus_tools.salt.util.DataSourceSequence in project ANNIS by korpling.

the class LegacyGraphConverter method convertToAnnotationGraph.

public static AnnotationGraph convertToAnnotationGraph(SDocumentGraph docGraph, List<Long> matchedNodeIDs) {
    Set<Long> matchSet = new HashSet<>(matchedNodeIDs);
    AnnotationGraph annoGraph = new AnnotationGraph();
    List<String> pathList = CommonHelper.getCorpusPath(docGraph.getDocument().getGraph(), docGraph.getDocument());
    annoGraph.setPath(pathList.toArray(new String[pathList.size()]));
    annoGraph.setDocumentName(docGraph.getDocument().getName());
    Map<SNode, AnnisNode> allNodes = new HashMap<>();
    for (SNode sNode : docGraph.getNodes()) {
        SFeature featNodeRaw = sNode.getFeature(SaltUtil.createQName(ANNIS_NS, FEAT_RELANNIS_NODE));
        if (featNodeRaw != null) {
            RelannisNodeFeature featNode = (RelannisNodeFeature) featNodeRaw.getValue();
            long internalID = featNode.getInternalID();
            AnnisNode aNode = new AnnisNode(internalID);
            for (SAnnotation sAnno : sNode.getAnnotations()) {
                aNode.addNodeAnnotation(new Annotation(sAnno.getNamespace(), sAnno.getName(), sAnno.getValue_STEXT()));
            }
            aNode.setName(sNode.getName());
            Set<SLayer> layers = sNode.getLayers();
            if (!layers.isEmpty()) {
                aNode.setNamespace(layers.iterator().next().getName());
            }
            RelannisNodeFeature feat = (RelannisNodeFeature) sNode.getFeature(SaltUtil.createQName(ANNIS_NS, FEAT_RELANNIS_NODE)).getValue();
            if (sNode instanceof SToken) {
                List<DataSourceSequence> seqList = docGraph.getOverlappedDataSourceSequence(sNode, SALT_TYPE.STEXT_OVERLAPPING_RELATION);
                if (seqList != null) {
                    DataSourceSequence seq = seqList.get(0);
                    Preconditions.checkNotNull(seq, "DataSourceSequence is null for token %s", sNode.getId());
                    SSequentialDS seqDS = seq.getDataSource();
                    Preconditions.checkNotNull(seqDS, "SSequentalDS is null for token %s", sNode.getId());
                    Preconditions.checkNotNull(seqDS.getData(), "SSequentalDS data is null for token %s", sNode.getId());
                    String seqDSData = (String) seqDS.getData();
                    Preconditions.checkNotNull(seqDSData, "casted SSequentalDS data is null for token %s", sNode.getId());
                    Preconditions.checkNotNull(seq.getStart(), "SSequentalDS start is null for token %s", sNode.getId());
                    Preconditions.checkNotNull(seq.getEnd(), "SSequentalDS end is null for supposed token %s", sNode.getId());
                    int start = seq.getStart().intValue();
                    int end = seq.getEnd().intValue();
                    Preconditions.checkState(start >= 0 && start <= end && end <= seqDSData.length(), "Illegal start or end of textual DS for token (start %s, end: %s)", sNode.getId(), start, end);
                    String spannedText = seqDSData.substring(start, end);
                    Preconditions.checkNotNull(spannedText, "spanned text is null for supposed token %s (start: %s, end: %s)", sNode.getId(), start, end);
                    aNode.setSpannedText(spannedText);
                    aNode.setToken(true);
                    aNode.setTokenIndex(feat.getTokenIndex());
                }
            } else {
                aNode.setToken(false);
                aNode.setTokenIndex(null);
            }
            aNode.setCorpus(feat.getCorpusRef());
            aNode.setTextId(feat.getTextRef());
            aNode.setLeft(feat.getLeft());
            aNode.setLeftToken(feat.getLeftToken());
            aNode.setRight(feat.getRight());
            aNode.setRightToken(feat.getRightToken());
            if (matchSet.contains(aNode.getId())) {
                aNode.setMatchedNodeInQuery((long) matchedNodeIDs.indexOf(aNode.getId()) + 1);
                annoGraph.getMatchedNodeIds().add(aNode.getId());
            } else {
                aNode.setMatchedNodeInQuery(null);
            }
            annoGraph.addNode(aNode);
            allNodes.put(sNode, aNode);
        }
    }
    for (SRelation rel : docGraph.getRelations()) {
        RelannisEdgeFeature featRelation = RelannisEdgeFeature.extract(rel);
        if (featRelation != null) {
            addRelation(rel, featRelation.getPre(), featRelation.getComponentID(), allNodes, annoGraph);
        }
    }
    // add relations with empty relation name for every dominance relation
    List<SDominanceRelation> dominanceRelations = new LinkedList<>(docGraph.getDominanceRelations());
    for (SDominanceRelation rel : dominanceRelations) {
        RelannisEdgeFeature featEdge = RelannisEdgeFeature.extract(rel);
        if (featEdge != null && featEdge.getArtificialDominanceComponent() != null && featEdge.getArtificialDominancePre() != null) {
            addRelation(SDominanceRelation.class, null, rel.getAnnotations(), rel.getSource(), rel.getTarget(), rel.getLayers(), featEdge.getArtificialDominancePre(), featEdge.getArtificialDominanceComponent(), allNodes, annoGraph);
        }
    }
    return annoGraph;
}

Also used : SLayer(org.corpus_tools.salt.core.SLayer) SNode(org.corpus_tools.salt.core.SNode) HashMap(java.util.HashMap) SToken(org.corpus_tools.salt.common.SToken) SRelation(org.corpus_tools.salt.core.SRelation) RelannisEdgeFeature(annis.model.RelannisEdgeFeature) SDominanceRelation(org.corpus_tools.salt.common.SDominanceRelation) HashSet(java.util.HashSet) RelannisNodeFeature(annis.model.RelannisNodeFeature) SAnnotation(org.corpus_tools.salt.core.SAnnotation) SSequentialDS(org.corpus_tools.salt.common.SSequentialDS) DataSourceSequence(org.corpus_tools.salt.util.DataSourceSequence) Annotation(annis.model.Annotation) SAnnotation(org.corpus_tools.salt.core.SAnnotation) LinkedList(java.util.LinkedList) AnnotationGraph(annis.model.AnnotationGraph) AnnisNode(annis.model.AnnisNode) SFeature(org.corpus_tools.salt.core.SFeature)

Example 4 with DataSourceSequence

use of org.corpus_tools.salt.util.DataSourceSequence in project ANNIS by korpling.

the class TimelineReconstructorTest method testBematacDialog.

/**
 * Tests a sample dialog reconstruction.
 * The dialog is this one: https://korpling.org/annis3/?id=44b60a56-31da-4469-b438-62fdb67f28f1
 *
 * The Salt which was generated by ANNIS is loaded and the virtual tokenization is removed.
 * It is checked if
 * <ul>
 * <li>the newly created tokenization is correct</li>
 * <li>spans cover the correct token</li>
 * </ul>
 */
@Test
public void testBematacDialog() {
    SDocumentGraph docGraph = SaltUtil.loadDocumentGraph(URI.createURI(getClass().getResource("SampleDialog.salt").toString()));
    Map<String, String> anno2order = new HashMap<>();
    anno2order.put("default_ns::instructee_utt", "instructee_dipl");
    anno2order.put("default_ns::instructor_utt", "instructor_dipl");
    TimelineReconstructor.removeVirtualTokenization(docGraph, anno2order);
    // instructor_dipl, instructor_norm, instructee_dipl, instructee_norm, instructee_extra, break
    List<STextualDS> texts = docGraph.getTextualDSs();
    assertEquals(6, texts.size());
    STextualDS instructorDipl = findTextualDSByName("instructor_dipl", texts);
    assertNotNull(instructorDipl);
    assertEquals("in Richtung des Toasters gehst ja gehst", instructorDipl.getText());
    DataSourceSequence<Integer> seq = new DataSourceSequence<>();
    seq.setDataSource(instructorDipl);
    seq.setStart(instructorDipl.getStart());
    seq.setEnd(instructorDipl.getEnd());
    List<SToken> instructorDiplToken = docGraph.getTokensBySequence(seq);
    assertEquals(7, instructorDiplToken.size());
    assertEquals("in", docGraph.getText(instructorDiplToken.get(0)));
    assertEquals("Richtung", docGraph.getText(instructorDiplToken.get(1)));
    assertEquals("des", docGraph.getText(instructorDiplToken.get(2)));
    assertEquals("Toasters", docGraph.getText(instructorDiplToken.get(3)));
    assertEquals("gehst", docGraph.getText(instructorDiplToken.get(4)));
    assertEquals("ja", docGraph.getText(instructorDiplToken.get(5)));
    assertEquals("gehst", docGraph.getText(instructorDiplToken.get(6)));
    // check that the other real spans are now connected with the token
    List<SNode> uttNode = docGraph.getNodesByName("sSpan1294");
    assertNotNull(uttNode);
    assertEquals(1, uttNode.size());
    SAnnotation uttAnno = uttNode.get(0).getAnnotation("default_ns::instructor_utt");
    assertNotNull(uttAnno);
    assertEquals("utt", uttAnno.getValue_STEXT());
    List<SRelation> uttOutRelations = uttNode.get(0).getOutRelations();
    assertNotNull(uttOutRelations);
    assertEquals(5, uttOutRelations.size());
    for (SRelation rel : uttOutRelations) {
        assertTrue(rel instanceof SSpanningRelation);
        assertEquals(instructorDipl, CommonHelper.getTextualDSForNode((SNode) rel.getTarget(), docGraph));
    }
    STextualDS instructorNorm = findTextualDSByName("instructor_norm", texts);
    assertNotNull(instructorNorm);
    assertEquals("in Richtung des Toasters gehst ja gehst", instructorNorm.getText());
    STextualDS instructeeDipl = findTextualDSByName("instructee_dipl", texts);
    assertNotNull(instructeeDipl);
    assertEquals("mhm ich geh in Richtung des Toasters okay", instructeeDipl.getText());
    STextualDS instructeeNorm = findTextualDSByName("instructee_norm", texts);
    assertNotNull(instructeeNorm);
    assertEquals("ich gehe in Richtung des Toasters okay", instructeeNorm.getText());
    STextualDS instructeeExtra = findTextualDSByName("instructee_extra", texts);
    assertNotNull(instructeeExtra);
    assertEquals("zeichnet", instructeeExtra.getText());
    STextualDS breakText = findTextualDSByName("break", texts);
    assertNotNull(breakText);
    assertEquals("0,7 0,5", breakText.getText());
}

Also used : SNode(org.corpus_tools.salt.core.SNode) HashMap(java.util.HashMap) SAnnotation(org.corpus_tools.salt.core.SAnnotation) DataSourceSequence(org.corpus_tools.salt.util.DataSourceSequence) SToken(org.corpus_tools.salt.common.SToken) SRelation(org.corpus_tools.salt.core.SRelation) SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) SSpanningRelation(org.corpus_tools.salt.common.SSpanningRelation) STextualDS(org.corpus_tools.salt.common.STextualDS) Test(org.junit.Test)

Example 5 with DataSourceSequence

use of org.corpus_tools.salt.util.DataSourceSequence in project ANNIS by korpling.

the class VakyarthaDependencyTree method getText.

/**
 * Get the text which is overlapped by the SNode.
 *
 * @return Empty string, if there are no token overlapped by the node.
 */
private String getText(SNode node, VisualizerInput input) {
    SDocumentGraph sDocumentGraph = input.getSResult().getDocumentGraph();
    List<DataSourceSequence> sequences = sDocumentGraph.getOverlappedDataSourceSequence(node, SALT_TYPE.STEXT_OVERLAPPING_RELATION);
    if (sequences != null && sequences.size() > 0) {
        return ((STextualDS) sequences.get(0).getDataSource()).getText().substring(sequences.get(0).getStart().intValue(), sequences.get(0).getEnd().intValue());
    }
    return "";
}

Also used : SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) DataSourceSequence(org.corpus_tools.salt.util.DataSourceSequence)

Aggregations

DataSourceSequence (org.corpus_tools.salt.util.DataSourceSequence)5 SDocumentGraph (org.corpus_tools.salt.common.SDocumentGraph)4 HashMap (java.util.HashMap)3 STextualDS (org.corpus_tools.salt.common.STextualDS)3 SToken (org.corpus_tools.salt.common.SToken)3 SAnnotation (org.corpus_tools.salt.core.SAnnotation)3 SNode (org.corpus_tools.salt.core.SNode)3 SRelation (org.corpus_tools.salt.core.SRelation)3 LinkedList (java.util.LinkedList)2 AnnisNode (annis.model.AnnisNode)1 Annotation (annis.model.Annotation)1 AnnotationGraph (annis.model.AnnotationGraph)1 RelannisEdgeFeature (annis.model.RelannisEdgeFeature)1 RelannisNodeFeature (annis.model.RelannisNodeFeature)1 Node (com.hp.gagawa.java.Node)1 Body (com.hp.gagawa.java.elements.Body)1 Doctype (com.hp.gagawa.java.elements.Doctype)1 Head (com.hp.gagawa.java.elements.Head)1 Html (com.hp.gagawa.java.elements.Html)1 Link (com.hp.gagawa.java.elements.Link)1