Search in sources :

Example 11 with SDocumentGraph

use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.

the class TimelineReconstructorTest method testBematacDialog.

/**
 * Tests a sample dialog reconstruction.
 * The dialog is this one: https://korpling.org/annis3/?id=44b60a56-31da-4469-b438-62fdb67f28f1
 *
 * The Salt which was generated by ANNIS is loaded and the virtual tokenization is removed.
 * It is checked if
 * <ul>
 * <li>the newly created tokenization is correct</li>
 * <li>spans cover the correct token</li>
 * </ul>
 */
@Test
public void testBematacDialog() {
    SDocumentGraph docGraph = SaltUtil.loadDocumentGraph(URI.createURI(getClass().getResource("SampleDialog.salt").toString()));
    Map<String, String> anno2order = new HashMap<>();
    anno2order.put("default_ns::instructee_utt", "instructee_dipl");
    anno2order.put("default_ns::instructor_utt", "instructor_dipl");
    TimelineReconstructor.removeVirtualTokenization(docGraph, anno2order);
    // instructor_dipl, instructor_norm, instructee_dipl, instructee_norm, instructee_extra, break
    List<STextualDS> texts = docGraph.getTextualDSs();
    assertEquals(6, texts.size());
    STextualDS instructorDipl = findTextualDSByName("instructor_dipl", texts);
    assertNotNull(instructorDipl);
    assertEquals("in Richtung des Toasters gehst ja gehst", instructorDipl.getText());
    DataSourceSequence<Integer> seq = new DataSourceSequence<>();
    seq.setDataSource(instructorDipl);
    seq.setStart(instructorDipl.getStart());
    seq.setEnd(instructorDipl.getEnd());
    List<SToken> instructorDiplToken = docGraph.getTokensBySequence(seq);
    assertEquals(7, instructorDiplToken.size());
    assertEquals("in", docGraph.getText(instructorDiplToken.get(0)));
    assertEquals("Richtung", docGraph.getText(instructorDiplToken.get(1)));
    assertEquals("des", docGraph.getText(instructorDiplToken.get(2)));
    assertEquals("Toasters", docGraph.getText(instructorDiplToken.get(3)));
    assertEquals("gehst", docGraph.getText(instructorDiplToken.get(4)));
    assertEquals("ja", docGraph.getText(instructorDiplToken.get(5)));
    assertEquals("gehst", docGraph.getText(instructorDiplToken.get(6)));
    // check that the other real spans are now connected with the token
    List<SNode> uttNode = docGraph.getNodesByName("sSpan1294");
    assertNotNull(uttNode);
    assertEquals(1, uttNode.size());
    SAnnotation uttAnno = uttNode.get(0).getAnnotation("default_ns::instructor_utt");
    assertNotNull(uttAnno);
    assertEquals("utt", uttAnno.getValue_STEXT());
    List<SRelation> uttOutRelations = uttNode.get(0).getOutRelations();
    assertNotNull(uttOutRelations);
    assertEquals(5, uttOutRelations.size());
    for (SRelation rel : uttOutRelations) {
        assertTrue(rel instanceof SSpanningRelation);
        assertEquals(instructorDipl, CommonHelper.getTextualDSForNode((SNode) rel.getTarget(), docGraph));
    }
    STextualDS instructorNorm = findTextualDSByName("instructor_norm", texts);
    assertNotNull(instructorNorm);
    assertEquals("in Richtung des Toasters gehst ja gehst", instructorNorm.getText());
    STextualDS instructeeDipl = findTextualDSByName("instructee_dipl", texts);
    assertNotNull(instructeeDipl);
    assertEquals("mhm ich geh in Richtung des Toasters okay", instructeeDipl.getText());
    STextualDS instructeeNorm = findTextualDSByName("instructee_norm", texts);
    assertNotNull(instructeeNorm);
    assertEquals("ich gehe in Richtung des Toasters okay", instructeeNorm.getText());
    STextualDS instructeeExtra = findTextualDSByName("instructee_extra", texts);
    assertNotNull(instructeeExtra);
    assertEquals("zeichnet", instructeeExtra.getText());
    STextualDS breakText = findTextualDSByName("break", texts);
    assertNotNull(breakText);
    assertEquals("0,7 0,5", breakText.getText());
}
Also used : SNode(org.corpus_tools.salt.core.SNode) HashMap(java.util.HashMap) SAnnotation(org.corpus_tools.salt.core.SAnnotation) DataSourceSequence(org.corpus_tools.salt.util.DataSourceSequence) SToken(org.corpus_tools.salt.common.SToken) SRelation(org.corpus_tools.salt.core.SRelation) SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) SSpanningRelation(org.corpus_tools.salt.common.SSpanningRelation) STextualDS(org.corpus_tools.salt.common.STextualDS) Test(org.junit.Test)

Example 12 with SDocumentGraph

use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.

the class EventExtractor method parseSalt.

/**
 * Converts Salt document graph to rows.
 *
 * @param input
 * @param showSpanAnnos
 * @param showTokenAnnos
 * @param mediaLayer  A set of all annotation layers which should be treated as special media layer.
 * @param annotationNames
 * @param replaceValueWithMediaIcon If true the actual value is removed and an icon for playing the media file is shown instead.
 * @param startTokenIndex token index of the first token in the match
 * @param endTokenIndex token index of the last token in the match
 * @param pdfController makes status of all pdfviewer available for the
 * events.
 * @param text If non-null only include annotations for nodes of the specified text.
 * @return
 */
public static LinkedHashMap<String, ArrayList<Row>> parseSalt(VisualizerInput input, boolean showSpanAnnos, boolean showTokenAnnos, List<String> annotationNames, Set<String> mediaLayer, boolean replaceValueWithMediaIcon, long startTokenIndex, long endTokenIndex, PDFController pdfController, STextualDS text) {
    SDocumentGraph graph = input.getDocument().getDocumentGraph();
    // only look at annotations which were defined by the user
    LinkedHashMap<String, ArrayList<Row>> rowsByAnnotation = new LinkedHashMap<>();
    for (String anno : annotationNames) {
        rowsByAnnotation.put(anno, new ArrayList<Row>());
    }
    AtomicInteger eventCounter = new AtomicInteger();
    PDFPageHelper pageNumberHelper = new PDFPageHelper(input);
    if (showSpanAnnos) {
        for (SSpan span : graph.getSpans()) {
            if (text == null || text == CommonHelper.getTextualDSForNode(span, graph)) {
                addAnnotationsForNode(span, graph, startTokenIndex, endTokenIndex, pdfController, pageNumberHelper, eventCounter, rowsByAnnotation, true, mediaLayer, replaceValueWithMediaIcon);
            }
        }
    // end for each span
    }
    if (showTokenAnnos) {
        for (SToken tok : graph.getTokens()) {
            if (text == null || text == CommonHelper.getTextualDSForNode(tok, graph)) {
                addAnnotationsForNode(tok, graph, startTokenIndex, endTokenIndex, pdfController, pageNumberHelper, eventCounter, rowsByAnnotation, false, mediaLayer, replaceValueWithMediaIcon);
            }
        }
    }
    // 2. merge rows when possible
    for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
        mergeAllRowsIfPossible(e.getValue());
    }
    // 3. sort events on one row by left token index
    for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
        for (Row r : e.getValue()) {
            sortEventsByTokenIndex(r);
        }
    }
    // 4. split up events if they cover islands
    for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
        for (Row r : e.getValue()) {
            splitRowsOnIslands(r, graph, text, startTokenIndex, endTokenIndex);
        }
    }
    // 5. split up events if they have gaps
    for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
        for (Row r : e.getValue()) {
            splitRowsOnGaps(r, graph, startTokenIndex, endTokenIndex);
        }
    }
    return rowsByAnnotation;
}
Also used : SSpan(org.corpus_tools.salt.common.SSpan) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) SToken(org.corpus_tools.salt.common.SToken) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) PDFPageHelper(annis.libgui.PDFPageHelper) Row(annis.gui.widgets.grid.Row) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap)

Example 13 with SDocumentGraph

use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.

the class EventExtractor method computeDisplayedNamespace.

/**
 * Returns the annotations to which should be displayed together with their namespace.
 *
 * This will check the "show_ns" paramter for determining.
 * the annotations to display. It also iterates over all nodes of the graph
 * matching the type.
 *
 * @param input The input for the visualizer.
 * @param types Which types of nodes to include
 * @return
 */
public static Set<String> computeDisplayedNamespace(VisualizerInput input, List<Class<? extends SNode>> types) {
    if (input == null) {
        return new HashSet<>();
    }
    String showNamespaceConfig = input.getMappings().getProperty(GridComponent.MAPPING_SHOW_NAMESPACE);
    if (showNamespaceConfig != null) {
        SDocumentGraph graph = input.getDocument().getDocumentGraph();
        Set<String> annoPool = new LinkedHashSet<>();
        for (Class<? extends SNode> t : types) {
            annoPool.addAll(SToken.class.isAssignableFrom(t) ? getAnnotationLevelSet(graph, null, t) : getAnnotationLevelSet(graph, input.getNamespace(), t));
        }
        if ("true".equalsIgnoreCase(showNamespaceConfig)) {
            // all annotations should be displayed with a namespace
            return annoPool;
        } else if ("false".equalsIgnoreCase(showNamespaceConfig)) {
            return new LinkedHashSet<>();
        } else {
            Set<String> annos = new LinkedHashSet<>();
            List<String> defs = Splitter.on(',').omitEmptyStrings().trimResults().splitToList(showNamespaceConfig);
            for (String s : defs) {
                // is regular expression?
                if (s.startsWith("/") && s.endsWith("/")) {
                    // go over all remaining items in our pool of all annotations and
                    // check if they match
                    Pattern regex = Pattern.compile(StringUtils.strip(s, "/"));
                    LinkedList<String> matchingAnnos = new LinkedList<>();
                    for (String a : annoPool) {
                        if (regex.matcher(a).matches()) {
                            matchingAnnos.add(a);
                        }
                    }
                    annos.addAll(matchingAnnos);
                    annoPool.removeAll(matchingAnnos);
                } else {
                    annos.add(s);
                    annoPool.remove(s);
                }
            }
            return annos;
        }
    }
    return new LinkedHashSet<>();
}
Also used : LinkedHashSet(java.util.LinkedHashSet) Pattern(java.util.regex.Pattern) Set(java.util.Set) TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) BitSet(java.util.BitSet) SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) LinkedList(java.util.LinkedList) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 14 with SDocumentGraph

use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.

the class GridComponent method createAnnotationGrid.

private void createAnnotationGrid() {
    String resultID = input.getId();
    grid = new AnnotationGrid(mediaController, pdfController, resultID);
    grid.addStyleName(getMainStyle());
    grid.addStyleName(Helper.CORPUS_FONT_FORCE);
    grid.setEscapeHTML(Boolean.parseBoolean(input.getMappings().getProperty(MAPPING_ESCAPE_HTML, "true")));
    LinkedList<Class<? extends SNode>> types = new LinkedList<>();
    if (isShowingSpanAnnotations()) {
        types.add(SSpan.class);
    }
    if (isShowingTokenAnnotations()) {
        types.add(SToken.class);
    }
    grid.setAnnosWithNamespace(EventExtractor.computeDisplayedNamespace(input, types));
    layout.addComponent(grid);
    SDocumentGraph graph = input.getDocument().getDocumentGraph();
    List<SNode> tokens = CommonHelper.getSortedSegmentationNodes(segmentationName, graph);
    Preconditions.checkArgument(!tokens.isEmpty(), "Token list must be non-empty");
    RelannisNodeFeature featTokStart = (RelannisNodeFeature) tokens.get(0).getFeature(AnnisConstants.ANNIS_NS, AnnisConstants.FEAT_RELANNIS_NODE).getValue();
    long startIndex = featTokStart.getTokenIndex();
    RelannisNodeFeature featTokEnd = (RelannisNodeFeature) tokens.get(tokens.size() - 1).getFeature(AnnisConstants.ANNIS_NS, AnnisConstants.FEAT_RELANNIS_NODE).getValue();
    long endIndex = featTokEnd.getTokenIndex();
    LinkedHashMap<String, ArrayList<Row>> rowsByAnnotation = computeAnnotationRows(startIndex, endIndex);
    // Get Mappings
    String gridTemplates = input.getMappings().getProperty(MAPPING_GRID_TEMPLATES, "");
    // Parse Mappings
    if (!gridTemplates.equals("")) {
        String[] split = gridTemplates.split("\\|\\|");
        for (String s : split) {
            // example of s: entity="person"==>:), or infstat==><b>%%value%%</b>
            String[] unit_split = s.split("==>");
            Set set = rowsByAnnotation.entrySet();
            // Displaying elements of LinkedHashMap
            Iterator iterator = set.iterator();
            while (iterator.hasNext()) {
                // iterate over rows
                Map.Entry me = (Map.Entry) iterator.next();
                String rowKey = (String) me.getKey();
                ArrayList<Row> rowValue = (ArrayList<Row>) me.getValue();
                for (Row rowValue1 : rowValue) {
                    ArrayList<GridEvent> rowEvents = rowValue1.getEvents();
                    if (unit_split[0].indexOf('=') < 0) {
                        // unit_split[0] is a single instruction, e.g., infstat
                        // check if the key of a row in rowsByAnnotation is unit_split[0]
                        // if it is, we need to change every value of this row, else we dont do anything
                        String rowName = rowKey.split("::")[1];
                        if (rowName.equals(unit_split[0])) {
                            // iterate over all values and replace the value with the unit_split[1]
                            for (GridEvent ev : rowEvents) {
                                String origValue = ev.getValue();
                                String newValue = unit_split[1].replaceAll("%%value%%", origValue);
                                ev.setValue(newValue);
                            }
                        }
                    } else {
                        // its a instruction like entity='person'
                        // first break this split into entity and person
                        // check if rowKey is entity, then when iterating over events, check if value is person
                        String rowName = rowKey.split("::")[1];
                        String targetRow = unit_split[0].split("=")[0];
                        String targetValue = unit_split[0].split("=")[1].replaceAll("\"", "");
                        if (rowName.equals(targetRow)) {
                            // iterate over all values and replace the value with the unit_split[1]
                            for (GridEvent ev : rowEvents) {
                                String origValue = ev.getValue();
                                if (origValue.equals(targetValue)) {
                                    ev.setValue(unit_split[1]);
                                }
                            // String newValue = unit_split[1].replaceAll("%%value%%",origValue);
                            }
                        }
                    }
                }
            }
        }
    }
    // add tokens as row
    AtomicInteger tokenOffsetForText = new AtomicInteger(-1);
    Row tokenRow = computeTokenRow(tokens, graph, rowsByAnnotation, startIndex, tokenOffsetForText);
    if (isHidingToken()) {
        tokenRow.setStyle("invisible_token");
    }
    if (isTokenFirst()) {
        // copy original list but add token row at the beginning
        LinkedHashMap<String, ArrayList<Row>> newList = new LinkedHashMap<>();
        newList.put("tok", Lists.newArrayList(tokenRow));
        newList.putAll(rowsByAnnotation);
        rowsByAnnotation = newList;
    } else {
        // just add the token row to the end of the list
        rowsByAnnotation.put("tok", Lists.newArrayList(tokenRow));
    }
    EventExtractor.removeEmptySpace(rowsByAnnotation, tokenRow);
    // check if the token row only contains empty values
    boolean tokenRowIsEmpty = true;
    for (GridEvent tokenEvent : tokenRow.getEvents()) {
        if (tokenEvent.getValue() != null && !tokenEvent.getValue().trim().isEmpty()) {
            tokenRowIsEmpty = false;
            break;
        }
    }
    if (!isHidingToken() && canShowEmptyTokenWarning()) {
        lblEmptyToken.setVisible(tokenRowIsEmpty);
    }
    grid.setRowsByAnnotation(rowsByAnnotation);
    grid.setTokenIndexOffset(tokenOffsetForText.get());
}
Also used : SNode(org.corpus_tools.salt.core.SNode) HashSet(java.util.HashSet) Set(java.util.Set) GridEvent(annis.gui.widgets.grid.GridEvent) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) AnnotationGrid(annis.gui.widgets.grid.AnnotationGrid) SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) Iterator(java.util.Iterator) RelannisNodeFeature(annis.model.RelannisNodeFeature) LinkedList(java.util.LinkedList) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Row(annis.gui.widgets.grid.Row) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 15 with SDocumentGraph

use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.

the class VakyarthaDependencyTree method getText.

/**
 * Get the text which is overlapped by the SNode.
 *
 * @return Empty string, if there are no token overlapped by the node.
 */
private String getText(SNode node, VisualizerInput input) {
    SDocumentGraph sDocumentGraph = input.getSResult().getDocumentGraph();
    List<DataSourceSequence> sequences = sDocumentGraph.getOverlappedDataSourceSequence(node, SALT_TYPE.STEXT_OVERLAPPING_RELATION);
    if (sequences != null && sequences.size() > 0) {
        return ((STextualDS) sequences.get(0).getDataSource()).getText().substring(sequences.get(0).getStart().intValue(), sequences.get(0).getEnd().intValue());
    }
    return "";
}
Also used : SDocumentGraph(org.corpus_tools.salt.common.SDocumentGraph) DataSourceSequence(org.corpus_tools.salt.util.DataSourceSequence)

Aggregations

SDocumentGraph (org.corpus_tools.salt.common.SDocumentGraph)24 SNode (org.corpus_tools.salt.core.SNode)12 ArrayList (java.util.ArrayList)9 SDocument (org.corpus_tools.salt.common.SDocument)8 SaltProject (org.corpus_tools.salt.common.SaltProject)8 SCorpusGraph (org.corpus_tools.salt.common.SCorpusGraph)7 SToken (org.corpus_tools.salt.common.SToken)7 LinkedList (java.util.LinkedList)6 Test (org.junit.Test)6 HashMap (java.util.HashMap)5 SRelation (org.corpus_tools.salt.core.SRelation)5 RelannisNodeFeature (annis.model.RelannisNodeFeature)4 DataSourceSequence (org.corpus_tools.salt.util.DataSourceSequence)4 TreeSet (java.util.TreeSet)3 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)3 Pattern (java.util.regex.Pattern)3 SCorpus (org.corpus_tools.salt.common.SCorpus)3 SFeature (org.corpus_tools.salt.core.SFeature)3 Row (annis.gui.widgets.grid.Row)2 IOException (java.io.IOException)2