use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.
the class TimelineReconstructorTest method testBematacDialog.
/**
* Tests a sample dialog reconstruction.
* The dialog is this one: https://korpling.org/annis3/?id=44b60a56-31da-4469-b438-62fdb67f28f1
*
* The Salt which was generated by ANNIS is loaded and the virtual tokenization is removed.
* It is checked if
* <ul>
* <li>the newly created tokenization is correct</li>
* <li>spans cover the correct token</li>
* </ul>
*/
@Test
public void testBematacDialog() {
SDocumentGraph docGraph = SaltUtil.loadDocumentGraph(URI.createURI(getClass().getResource("SampleDialog.salt").toString()));
Map<String, String> anno2order = new HashMap<>();
anno2order.put("default_ns::instructee_utt", "instructee_dipl");
anno2order.put("default_ns::instructor_utt", "instructor_dipl");
TimelineReconstructor.removeVirtualTokenization(docGraph, anno2order);
// instructor_dipl, instructor_norm, instructee_dipl, instructee_norm, instructee_extra, break
List<STextualDS> texts = docGraph.getTextualDSs();
assertEquals(6, texts.size());
STextualDS instructorDipl = findTextualDSByName("instructor_dipl", texts);
assertNotNull(instructorDipl);
assertEquals("in Richtung des Toasters gehst ja gehst", instructorDipl.getText());
DataSourceSequence<Integer> seq = new DataSourceSequence<>();
seq.setDataSource(instructorDipl);
seq.setStart(instructorDipl.getStart());
seq.setEnd(instructorDipl.getEnd());
List<SToken> instructorDiplToken = docGraph.getTokensBySequence(seq);
assertEquals(7, instructorDiplToken.size());
assertEquals("in", docGraph.getText(instructorDiplToken.get(0)));
assertEquals("Richtung", docGraph.getText(instructorDiplToken.get(1)));
assertEquals("des", docGraph.getText(instructorDiplToken.get(2)));
assertEquals("Toasters", docGraph.getText(instructorDiplToken.get(3)));
assertEquals("gehst", docGraph.getText(instructorDiplToken.get(4)));
assertEquals("ja", docGraph.getText(instructorDiplToken.get(5)));
assertEquals("gehst", docGraph.getText(instructorDiplToken.get(6)));
// check that the other real spans are now connected with the token
List<SNode> uttNode = docGraph.getNodesByName("sSpan1294");
assertNotNull(uttNode);
assertEquals(1, uttNode.size());
SAnnotation uttAnno = uttNode.get(0).getAnnotation("default_ns::instructor_utt");
assertNotNull(uttAnno);
assertEquals("utt", uttAnno.getValue_STEXT());
List<SRelation> uttOutRelations = uttNode.get(0).getOutRelations();
assertNotNull(uttOutRelations);
assertEquals(5, uttOutRelations.size());
for (SRelation rel : uttOutRelations) {
assertTrue(rel instanceof SSpanningRelation);
assertEquals(instructorDipl, CommonHelper.getTextualDSForNode((SNode) rel.getTarget(), docGraph));
}
STextualDS instructorNorm = findTextualDSByName("instructor_norm", texts);
assertNotNull(instructorNorm);
assertEquals("in Richtung des Toasters gehst ja gehst", instructorNorm.getText());
STextualDS instructeeDipl = findTextualDSByName("instructee_dipl", texts);
assertNotNull(instructeeDipl);
assertEquals("mhm ich geh in Richtung des Toasters okay", instructeeDipl.getText());
STextualDS instructeeNorm = findTextualDSByName("instructee_norm", texts);
assertNotNull(instructeeNorm);
assertEquals("ich gehe in Richtung des Toasters okay", instructeeNorm.getText());
STextualDS instructeeExtra = findTextualDSByName("instructee_extra", texts);
assertNotNull(instructeeExtra);
assertEquals("zeichnet", instructeeExtra.getText());
STextualDS breakText = findTextualDSByName("break", texts);
assertNotNull(breakText);
assertEquals("0,7 0,5", breakText.getText());
}
use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.
the class EventExtractor method parseSalt.
/**
* Converts Salt document graph to rows.
*
* @param input
* @param showSpanAnnos
* @param showTokenAnnos
* @param mediaLayer A set of all annotation layers which should be treated as special media layer.
* @param annotationNames
* @param replaceValueWithMediaIcon If true the actual value is removed and an icon for playing the media file is shown instead.
* @param startTokenIndex token index of the first token in the match
* @param endTokenIndex token index of the last token in the match
* @param pdfController makes status of all pdfviewer available for the
* events.
* @param text If non-null only include annotations for nodes of the specified text.
* @return
*/
public static LinkedHashMap<String, ArrayList<Row>> parseSalt(VisualizerInput input, boolean showSpanAnnos, boolean showTokenAnnos, List<String> annotationNames, Set<String> mediaLayer, boolean replaceValueWithMediaIcon, long startTokenIndex, long endTokenIndex, PDFController pdfController, STextualDS text) {
SDocumentGraph graph = input.getDocument().getDocumentGraph();
// only look at annotations which were defined by the user
LinkedHashMap<String, ArrayList<Row>> rowsByAnnotation = new LinkedHashMap<>();
for (String anno : annotationNames) {
rowsByAnnotation.put(anno, new ArrayList<Row>());
}
AtomicInteger eventCounter = new AtomicInteger();
PDFPageHelper pageNumberHelper = new PDFPageHelper(input);
if (showSpanAnnos) {
for (SSpan span : graph.getSpans()) {
if (text == null || text == CommonHelper.getTextualDSForNode(span, graph)) {
addAnnotationsForNode(span, graph, startTokenIndex, endTokenIndex, pdfController, pageNumberHelper, eventCounter, rowsByAnnotation, true, mediaLayer, replaceValueWithMediaIcon);
}
}
// end for each span
}
if (showTokenAnnos) {
for (SToken tok : graph.getTokens()) {
if (text == null || text == CommonHelper.getTextualDSForNode(tok, graph)) {
addAnnotationsForNode(tok, graph, startTokenIndex, endTokenIndex, pdfController, pageNumberHelper, eventCounter, rowsByAnnotation, false, mediaLayer, replaceValueWithMediaIcon);
}
}
}
// 2. merge rows when possible
for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
mergeAllRowsIfPossible(e.getValue());
}
// 3. sort events on one row by left token index
for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
for (Row r : e.getValue()) {
sortEventsByTokenIndex(r);
}
}
// 4. split up events if they cover islands
for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
for (Row r : e.getValue()) {
splitRowsOnIslands(r, graph, text, startTokenIndex, endTokenIndex);
}
}
// 5. split up events if they have gaps
for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
for (Row r : e.getValue()) {
splitRowsOnGaps(r, graph, startTokenIndex, endTokenIndex);
}
}
return rowsByAnnotation;
}
use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.
the class EventExtractor method computeDisplayedNamespace.
/**
* Returns the annotations to which should be displayed together with their namespace.
*
* This will check the "show_ns" paramter for determining.
* the annotations to display. It also iterates over all nodes of the graph
* matching the type.
*
* @param input The input for the visualizer.
* @param types Which types of nodes to include
* @return
*/
public static Set<String> computeDisplayedNamespace(VisualizerInput input, List<Class<? extends SNode>> types) {
if (input == null) {
return new HashSet<>();
}
String showNamespaceConfig = input.getMappings().getProperty(GridComponent.MAPPING_SHOW_NAMESPACE);
if (showNamespaceConfig != null) {
SDocumentGraph graph = input.getDocument().getDocumentGraph();
Set<String> annoPool = new LinkedHashSet<>();
for (Class<? extends SNode> t : types) {
annoPool.addAll(SToken.class.isAssignableFrom(t) ? getAnnotationLevelSet(graph, null, t) : getAnnotationLevelSet(graph, input.getNamespace(), t));
}
if ("true".equalsIgnoreCase(showNamespaceConfig)) {
// all annotations should be displayed with a namespace
return annoPool;
} else if ("false".equalsIgnoreCase(showNamespaceConfig)) {
return new LinkedHashSet<>();
} else {
Set<String> annos = new LinkedHashSet<>();
List<String> defs = Splitter.on(',').omitEmptyStrings().trimResults().splitToList(showNamespaceConfig);
for (String s : defs) {
// is regular expression?
if (s.startsWith("/") && s.endsWith("/")) {
// go over all remaining items in our pool of all annotations and
// check if they match
Pattern regex = Pattern.compile(StringUtils.strip(s, "/"));
LinkedList<String> matchingAnnos = new LinkedList<>();
for (String a : annoPool) {
if (regex.matcher(a).matches()) {
matchingAnnos.add(a);
}
}
annos.addAll(matchingAnnos);
annoPool.removeAll(matchingAnnos);
} else {
annos.add(s);
annoPool.remove(s);
}
}
return annos;
}
}
return new LinkedHashSet<>();
}
use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.
the class GridComponent method createAnnotationGrid.
private void createAnnotationGrid() {
String resultID = input.getId();
grid = new AnnotationGrid(mediaController, pdfController, resultID);
grid.addStyleName(getMainStyle());
grid.addStyleName(Helper.CORPUS_FONT_FORCE);
grid.setEscapeHTML(Boolean.parseBoolean(input.getMappings().getProperty(MAPPING_ESCAPE_HTML, "true")));
LinkedList<Class<? extends SNode>> types = new LinkedList<>();
if (isShowingSpanAnnotations()) {
types.add(SSpan.class);
}
if (isShowingTokenAnnotations()) {
types.add(SToken.class);
}
grid.setAnnosWithNamespace(EventExtractor.computeDisplayedNamespace(input, types));
layout.addComponent(grid);
SDocumentGraph graph = input.getDocument().getDocumentGraph();
List<SNode> tokens = CommonHelper.getSortedSegmentationNodes(segmentationName, graph);
Preconditions.checkArgument(!tokens.isEmpty(), "Token list must be non-empty");
RelannisNodeFeature featTokStart = (RelannisNodeFeature) tokens.get(0).getFeature(AnnisConstants.ANNIS_NS, AnnisConstants.FEAT_RELANNIS_NODE).getValue();
long startIndex = featTokStart.getTokenIndex();
RelannisNodeFeature featTokEnd = (RelannisNodeFeature) tokens.get(tokens.size() - 1).getFeature(AnnisConstants.ANNIS_NS, AnnisConstants.FEAT_RELANNIS_NODE).getValue();
long endIndex = featTokEnd.getTokenIndex();
LinkedHashMap<String, ArrayList<Row>> rowsByAnnotation = computeAnnotationRows(startIndex, endIndex);
// Get Mappings
String gridTemplates = input.getMappings().getProperty(MAPPING_GRID_TEMPLATES, "");
// Parse Mappings
if (!gridTemplates.equals("")) {
String[] split = gridTemplates.split("\\|\\|");
for (String s : split) {
// example of s: entity="person"==>:), or infstat==><b>%%value%%</b>
String[] unit_split = s.split("==>");
Set set = rowsByAnnotation.entrySet();
// Displaying elements of LinkedHashMap
Iterator iterator = set.iterator();
while (iterator.hasNext()) {
// iterate over rows
Map.Entry me = (Map.Entry) iterator.next();
String rowKey = (String) me.getKey();
ArrayList<Row> rowValue = (ArrayList<Row>) me.getValue();
for (Row rowValue1 : rowValue) {
ArrayList<GridEvent> rowEvents = rowValue1.getEvents();
if (unit_split[0].indexOf('=') < 0) {
// unit_split[0] is a single instruction, e.g., infstat
// check if the key of a row in rowsByAnnotation is unit_split[0]
// if it is, we need to change every value of this row, else we dont do anything
String rowName = rowKey.split("::")[1];
if (rowName.equals(unit_split[0])) {
// iterate over all values and replace the value with the unit_split[1]
for (GridEvent ev : rowEvents) {
String origValue = ev.getValue();
String newValue = unit_split[1].replaceAll("%%value%%", origValue);
ev.setValue(newValue);
}
}
} else {
// its a instruction like entity='person'
// first break this split into entity and person
// check if rowKey is entity, then when iterating over events, check if value is person
String rowName = rowKey.split("::")[1];
String targetRow = unit_split[0].split("=")[0];
String targetValue = unit_split[0].split("=")[1].replaceAll("\"", "");
if (rowName.equals(targetRow)) {
// iterate over all values and replace the value with the unit_split[1]
for (GridEvent ev : rowEvents) {
String origValue = ev.getValue();
if (origValue.equals(targetValue)) {
ev.setValue(unit_split[1]);
}
// String newValue = unit_split[1].replaceAll("%%value%%",origValue);
}
}
}
}
}
}
}
// add tokens as row
AtomicInteger tokenOffsetForText = new AtomicInteger(-1);
Row tokenRow = computeTokenRow(tokens, graph, rowsByAnnotation, startIndex, tokenOffsetForText);
if (isHidingToken()) {
tokenRow.setStyle("invisible_token");
}
if (isTokenFirst()) {
// copy original list but add token row at the beginning
LinkedHashMap<String, ArrayList<Row>> newList = new LinkedHashMap<>();
newList.put("tok", Lists.newArrayList(tokenRow));
newList.putAll(rowsByAnnotation);
rowsByAnnotation = newList;
} else {
// just add the token row to the end of the list
rowsByAnnotation.put("tok", Lists.newArrayList(tokenRow));
}
EventExtractor.removeEmptySpace(rowsByAnnotation, tokenRow);
// check if the token row only contains empty values
boolean tokenRowIsEmpty = true;
for (GridEvent tokenEvent : tokenRow.getEvents()) {
if (tokenEvent.getValue() != null && !tokenEvent.getValue().trim().isEmpty()) {
tokenRowIsEmpty = false;
break;
}
}
if (!isHidingToken() && canShowEmptyTokenWarning()) {
lblEmptyToken.setVisible(tokenRowIsEmpty);
}
grid.setRowsByAnnotation(rowsByAnnotation);
grid.setTokenIndexOffset(tokenOffsetForText.get());
}
use of org.corpus_tools.salt.common.SDocumentGraph in project ANNIS by korpling.
the class VakyarthaDependencyTree method getText.
/**
* Get the text which is overlapped by the SNode.
*
* @return Empty string, if there are no token overlapped by the node.
*/
private String getText(SNode node, VisualizerInput input) {
SDocumentGraph sDocumentGraph = input.getSResult().getDocumentGraph();
List<DataSourceSequence> sequences = sDocumentGraph.getOverlappedDataSourceSequence(node, SALT_TYPE.STEXT_OVERLAPPING_RELATION);
if (sequences != null && sequences.size() > 0) {
return ((STextualDS) sequences.get(0).getDataSource()).getText().substring(sequences.get(0).getStart().intValue(), sequences.get(0).getEnd().intValue());
}
return "";
}
Aggregations