use of org.corpus_tools.salt.common.SSpan in project ANNIS by korpling.
the class HTMLVis method createHTML.
public String createHTML(SDocumentGraph graph, VisualizationDefinition[] definitions) {
HashMap<VisualizationDefinition, Integer> instruction_priorities = new HashMap<>();
SortedMap<Long, List<OutputItem>> outputStartTags = new TreeMap<>();
SortedMap<Long, List<OutputItem>> outputEndTags = new TreeMap<>();
StringBuilder sb = new StringBuilder();
List<SToken> token = graph.getSortedTokenByText();
// Get metadata for visualizer if stylesheet requires it
// First check the stylesheet
Boolean bolMetaTypeFound = false;
HashMap<String, String> meta = new HashMap<>();
int def_priority = 0;
for (VisualizationDefinition vis : definitions) {
if (vis.getOutputter().getType() == SpanHTMLOutputter.Type.META_NAME) {
bolMetaTypeFound = true;
} else // not a meta-annotation, remember order in config file to set priority
{
if (vis.getMatcher() instanceof AnnotationNameMatcher) {
instruction_priorities.put(vis, def_priority);
} else if (vis.getMatcher() instanceof AnnotationNameAndValueMatcher) {
instruction_priorities.put(vis, def_priority);
} else if (vis.getMatcher() instanceof TokenMatcher) {
instruction_priorities.put(vis, def_priority);
}
def_priority--;
}
vis.getOutputter().setMeta(meta);
}
if (// Metadata is required, get corpus and document name
bolMetaTypeFound == true) {
// Get corpus and document name
String strDocName = "";
String strCorpName = "";
strDocName = graph.getDocument().getName();
List<String> corpusPath = CommonHelper.getCorpusPath(graph.getDocument().getGraph(), graph.getDocument());
strCorpName = corpusPath.get(corpusPath.size() - 1);
// Get metadata and put in hashmap
List<Annotation> metaData = Helper.getMetaDataDoc(strCorpName, strDocName);
for (Annotation metaDatum : metaData) {
meta.put(metaDatum.getName(), metaDatum.getValue());
}
}
for (SToken t : token) {
tokenColor = "";
if (mc.containsKey(t) && hitMark) {
tokenColor = MatchedNodeColors.getHTMLColorByMatch(mc.get(t));
}
for (VisualizationDefinition vis : definitions) {
String matched = vis.getMatcher().matchedAnnotation(t);
if (matched != null) {
vis.getOutputter().outputHTML(t, matched, outputStartTags, outputEndTags, tokenColor, Objects.firstNonNull(instruction_priorities.get(vis), 0));
}
}
}
List<SSpan> spans = graph.getSpans();
for (VisualizationDefinition vis : definitions) {
for (SSpan span : spans) {
tokenColor = "";
if (mc.containsKey(span) && hitMark) {
tokenColor = MatchedNodeColors.getHTMLColorByMatch(mc.get(span));
}
String matched = vis.getMatcher().matchedAnnotation(span);
if (matched != null) {
vis.getOutputter().outputHTML(span, matched, outputStartTags, outputEndTags, tokenColor, Objects.firstNonNull(instruction_priorities.get(vis), 0));
}
}
}
int minStartTagPos = outputStartTags.firstKey().intValue();
int maxEndTagPos = outputEndTags.lastKey().intValue();
// Find BEGIN and END instructions if available
for (VisualizationDefinition vis : definitions) {
if (vis.getMatcher() instanceof PseudoRegionMatcher) {
PseudoRegionMatcher.PseudoRegion psdRegionType = ((PseudoRegionMatcher) vis.getMatcher()).getPsdRegion();
int positionStart = 0;
int positionEnd = 0;
if (!outputEndTags.isEmpty() && !outputStartTags.isEmpty() && psdRegionType != null) {
switch(psdRegionType) {
case BEGIN:
positionStart = positionEnd = Integer.MIN_VALUE;
// def_priority is now lower than all normal annotation
instruction_priorities.put(vis, def_priority);
break;
case END:
positionStart = positionEnd = Integer.MAX_VALUE;
// def_priority is now lower than all normal annotation
instruction_priorities.put(vis, def_priority);
break;
case ALL:
// use same position as last and first key
positionStart = minStartTagPos;
positionEnd = maxEndTagPos;
// The ALL pseudo-range must enclose everything, thus it get the
// priority which is one lower than the smallest non BEGIN/END
// priority.
instruction_priorities.put(vis, def_priority);
break;
default:
break;
}
}
switch(vis.getOutputter().getType()) {
case META_NAME:
String strMetaVal = meta.get(vis.getOutputter().getMetaname().trim());
if (strMetaVal == null) {
throw new NullPointerException("no such metadata name in document: '" + vis.getOutputter().getMetaname().trim() + "'");
} else {
vis.getOutputter().outputAny(positionStart, positionEnd, ((PseudoRegionMatcher) vis.getMatcher()).getAnnotationName(), strMetaVal, outputStartTags, outputEndTags, Objects.firstNonNull(instruction_priorities.get(vis), 0));
}
break;
case CONSTANT:
vis.getOutputter().outputAny(positionStart, positionEnd, ((PseudoRegionMatcher) vis.getMatcher()).getAnnotationName(), vis.getOutputter().getConstant(), outputStartTags, outputEndTags, Objects.firstNonNull(instruction_priorities.get(vis), 0));
break;
case EMPTY:
vis.getOutputter().outputAny(positionStart, positionEnd, ((PseudoRegionMatcher) vis.getMatcher()).getAnnotationName(), "", outputStartTags, outputEndTags, Objects.firstNonNull(instruction_priorities.get(vis), 0));
break;
case ANNO_NAME:
// this shouldn't happen, since the BEGIN/END instruction has no triggering annotation name or value
break;
case VALUE:
// this shouldn't happen, since the BEGIN/END instruction has no triggering annotation name or value
break;
case ESCAPED_VALUE:
// this shouldn't happen, since the BEGIN/END instruction has no triggering annotation name or value
break;
default:
}
}
}
// get all used indexes
Set<Long> indexes = new TreeSet<>();
indexes.addAll(outputStartTags.keySet());
indexes.addAll(outputEndTags.keySet());
for (Long i : indexes) {
// output all strings belonging to this token position
// first the start tags for this position
// add priorities from instruction_priorities for sorting length ties
List<OutputItem> unsortedStart = outputStartTags.get(i);
SortedSet<OutputItem> itemsStart = new TreeSet();
if (unsortedStart != null) {
Iterator<OutputItem> it = unsortedStart.iterator();
while (it.hasNext()) {
OutputItem s = it.next();
itemsStart.add(s);
}
}
{
Iterator<OutputItem> it = itemsStart.iterator();
boolean first = true;
while (it.hasNext()) {
OutputItem s = it.next();
if (!first) {
sb.append("-->");
}
first = false;
sb.append(s.getOutputString());
if (it.hasNext()) {
sb.append("<!--\n");
}
}
}
// then the end tags for this position, but inverse their order
List<OutputItem> unsortedEnd = outputEndTags.get(i);
SortedSet<OutputItem> itemsEnd = new TreeSet();
if (unsortedEnd != null) {
Iterator<OutputItem> it = unsortedEnd.iterator();
while (it.hasNext()) {
OutputItem s = it.next();
itemsEnd.add(s);
}
}
{
List<OutputItem> itemsEndReverse = new LinkedList<>(itemsEnd);
Collections.reverse(itemsEndReverse);
for (OutputItem s : itemsEndReverse) {
sb.append(s.getOutputString());
}
}
}
return sb.toString();
}
use of org.corpus_tools.salt.common.SSpan in project ANNIS by korpling.
the class PDFPageHelper method getAllSSpanWithPageNumber.
private void getAllSSpanWithPageNumber(SDocumentGraph graph) {
if (graph == null) {
log.error("could not get page annos from empty graph");
return;
}
List<SSpan> sSpans = graph.getSpans();
if (sSpans != null) {
for (SSpan s : sSpans) {
Set<SAnnotation> sAnnotations = s.getAnnotations();
if (sAnnotations != null) {
for (SAnnotation anno : sAnnotations) {
// TODO support mappings of resolver vis map
if (getPDFPageAnnotationName().equals(anno.getName())) {
int leftIdx = getLeftIndexFromSNode(s);
int rightIdx = getRightIndexFromSNode(s);
if (sspans.containsKey(leftIdx)) {
if (sspans.get(leftIdx).containsKey(rightIdx)) {
log.warn("an intervall {}-{} is overrided by: {}", s);
}
sspans.get(leftIdx).put(rightIdx, s);
} else {
sspans.put(leftIdx, new TreeMap<Integer, SSpan>());
sspans.get(leftIdx).put(rightIdx, s);
}
}
}
}
}
}
}
use of org.corpus_tools.salt.common.SSpan in project ANNIS by korpling.
the class PDFPageHelper method getMostLeftAndMostRightPageAnno.
/**
* Creates a String (eg. <b>3-9</b> or <b>3</b>), based on the most left and
* most right page annotation.
*
* <p>The page annotation is detected with
* {@link #getPageFromAnnotation(de.hu_berlin.german.korpling.saltnpepper.salt.saltCommon.sDocumentStructure.SSpan)}</p>
*
* @return A String which represents the start and the end page of a pdf,
* seperated by {@link #PAGE_NUMBER_SEPERATOR}. If there is no end page, or
* exactly one page annotation, only a String with one number is returned.
*/
public String getMostLeftAndMostRightPageAnno() {
if (sspans == null || sspans.isEmpty()) {
return null;
}
TreeMap<Integer, SSpan> rightTokIdxToSSpan = sspans.get(sspans.firstKey());
SSpan leftSpan = rightTokIdxToSSpan.get(rightTokIdxToSSpan.firstKey());
SSpan rightSpan = null;
Integer rightIdx = null;
for (Integer leftIdxKey : sspans.keySet()) {
for (Integer rightIdxKey : sspans.get(leftIdxKey).keySet()) {
if (rightIdx == null || rightIdx <= rightIdxKey) {
rightIdx = rightIdxKey;
rightSpan = sspans.get(leftIdxKey).get(rightIdx);
}
}
}
if (rightIdx != null) {
return getPageFromAnnotation(leftSpan) + PAGE_NUMBER_SEPERATOR + getPageFromAnnotation(rightSpan);
}
return getPageFromAnnotation(leftSpan);
}
use of org.corpus_tools.salt.common.SSpan in project ANNIS by korpling.
the class EventExtractor method addAnnotationsForNode.
private static void addAnnotationsForNode(SNode node, SDocumentGraph graph, long startTokenIndex, long endTokenIndex, PDFController pdfController, PDFPageHelper pageNumberHelper, AtomicInteger eventCounter, LinkedHashMap<String, ArrayList<Row>> rowsByAnnotation, boolean addMatch, Set<String> mediaLayer, boolean replaceValueWithMediaIcon) {
List<String> matchedAnnos = new ArrayList<>();
SFeature featMatchedAnnos = graph.getFeature(ANNIS_NS, FEAT_MATCHEDANNOS);
if (featMatchedAnnos != null) {
matchedAnnos = Splitter.on(',').trimResults().splitToList(featMatchedAnnos.getValue_STEXT());
}
// check if the span is a matched node
SFeature featMatched = node.getFeature(ANNIS_NS, FEAT_MATCHEDNODE);
Long matchRaw = featMatched == null ? null : featMatched.getValue_SNUMERIC();
String matchedQualifiedAnnoName = "";
if (matchRaw != null && matchRaw <= matchedAnnos.size()) {
matchedQualifiedAnnoName = matchedAnnos.get((int) ((long) matchRaw) - 1);
}
// calculate the left and right values of a span
// TODO: howto get these numbers with Salt?
RelannisNodeFeature feat = (RelannisNodeFeature) node.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
long leftLong = feat.getLeftToken();
long rightLong = feat.getRightToken();
leftLong = clip(leftLong, startTokenIndex, endTokenIndex);
rightLong = clip(rightLong, startTokenIndex, endTokenIndex);
int left = (int) (leftLong - startTokenIndex);
int right = (int) (rightLong - startTokenIndex);
for (SAnnotation anno : node.getAnnotations()) {
ArrayList<Row> rows = rowsByAnnotation.get(anno.getQName());
if (rows == null) {
// try again with only the name
rows = rowsByAnnotation.get(anno.getName());
}
if (rows != null) {
// only do something if the annotation was defined before
// 1. give each annotation of each span an own row
Row r = new Row();
String id = "event_" + eventCounter.incrementAndGet();
GridEvent event = new GridEvent(id, left, right, anno.getValue_STEXT());
event.setTooltip(Helper.getQualifiedName(anno));
if (addMatch && matchRaw != null) {
long match = matchRaw;
if (matchedQualifiedAnnoName.isEmpty()) {
// always set the match when there is no matched annotation at all
event.setMatch(match);
} else // check if the annotation also matches
if (matchedQualifiedAnnoName.equals(anno.getQName())) {
event.setMatch(match);
}
}
if (node instanceof SSpan) {
// calculate overlapped SToken
List<? extends SRelation<? extends SNode, ? extends SNode>> outEdges = graph.getOutRelations(node.getId());
if (outEdges != null) {
for (SRelation<? extends SNode, ? extends SNode> e : outEdges) {
if (e instanceof SSpanningRelation) {
SSpanningRelation spanRel = (SSpanningRelation) e;
SToken tok = spanRel.getTarget();
event.getCoveredIDs().add(tok.getId());
// get the STextualDS of this token and add it to the event
String textID = getTextID(tok, graph);
if (textID != null) {
event.setTextID(textID);
}
}
}
}
// end if span has out edges
} else if (node instanceof SToken) {
event.getCoveredIDs().add(node.getId());
// get the STextualDS of this token and add it to the event
String textID = getTextID((SToken) node, graph);
if (textID != null) {
event.setTextID(textID);
}
}
// try to get time annotations
if (mediaLayer == null || mediaLayer.contains(anno.getQName())) {
double[] startEndTime = TimeHelper.getOverlappedTime(node);
if (startEndTime.length == 1) {
if (replaceValueWithMediaIcon) {
event.setValue(" ");
event.setTooltip("play excerpt " + event.getStartTime());
}
event.setStartTime(startEndTime[0]);
} else if (startEndTime.length == 2) {
event.setStartTime(startEndTime[0]);
event.setEndTime(startEndTime[1]);
if (replaceValueWithMediaIcon) {
event.setValue(" ");
event.setTooltip("play excerpt " + event.getStartTime() + "-" + event.getEndTime());
}
}
}
r.addEvent(event);
rows.add(r);
if (pdfController != null && pdfController.sizeOfRegisterdPDFViewer() > 0) {
String page = pageNumberHelper.getPageFromAnnotation(node);
if (page != null) {
event.setPage(page);
}
}
}
}
// end for each annotation of span
}
use of org.corpus_tools.salt.common.SSpan in project ANNIS by korpling.
the class EventExtractor method parseSalt.
/**
* Converts Salt document graph to rows.
*
* @param input
* @param showSpanAnnos
* @param showTokenAnnos
* @param mediaLayer A set of all annotation layers which should be treated as special media layer.
* @param annotationNames
* @param replaceValueWithMediaIcon If true the actual value is removed and an icon for playing the media file is shown instead.
* @param startTokenIndex token index of the first token in the match
* @param endTokenIndex token index of the last token in the match
* @param pdfController makes status of all pdfviewer available for the
* events.
* @param text If non-null only include annotations for nodes of the specified text.
* @return
*/
public static LinkedHashMap<String, ArrayList<Row>> parseSalt(VisualizerInput input, boolean showSpanAnnos, boolean showTokenAnnos, List<String> annotationNames, Set<String> mediaLayer, boolean replaceValueWithMediaIcon, long startTokenIndex, long endTokenIndex, PDFController pdfController, STextualDS text) {
SDocumentGraph graph = input.getDocument().getDocumentGraph();
// only look at annotations which were defined by the user
LinkedHashMap<String, ArrayList<Row>> rowsByAnnotation = new LinkedHashMap<>();
for (String anno : annotationNames) {
rowsByAnnotation.put(anno, new ArrayList<Row>());
}
AtomicInteger eventCounter = new AtomicInteger();
PDFPageHelper pageNumberHelper = new PDFPageHelper(input);
if (showSpanAnnos) {
for (SSpan span : graph.getSpans()) {
if (text == null || text == CommonHelper.getTextualDSForNode(span, graph)) {
addAnnotationsForNode(span, graph, startTokenIndex, endTokenIndex, pdfController, pageNumberHelper, eventCounter, rowsByAnnotation, true, mediaLayer, replaceValueWithMediaIcon);
}
}
// end for each span
}
if (showTokenAnnos) {
for (SToken tok : graph.getTokens()) {
if (text == null || text == CommonHelper.getTextualDSForNode(tok, graph)) {
addAnnotationsForNode(tok, graph, startTokenIndex, endTokenIndex, pdfController, pageNumberHelper, eventCounter, rowsByAnnotation, false, mediaLayer, replaceValueWithMediaIcon);
}
}
}
// 2. merge rows when possible
for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
mergeAllRowsIfPossible(e.getValue());
}
// 3. sort events on one row by left token index
for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
for (Row r : e.getValue()) {
sortEventsByTokenIndex(r);
}
}
// 4. split up events if they cover islands
for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
for (Row r : e.getValue()) {
splitRowsOnIslands(r, graph, text, startTokenIndex, endTokenIndex);
}
}
// 5. split up events if they have gaps
for (Map.Entry<String, ArrayList<Row>> e : rowsByAnnotation.entrySet()) {
for (Row r : e.getValue()) {
splitRowsOnGaps(r, graph, startTokenIndex, endTokenIndex);
}
}
return rowsByAnnotation;
}
Aggregations