use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.
the class EventExtractor method splitRowsOnIslands.
/**
* Splits events of a row if they overlap an island. Islands are areas between
* the token which are included in the result.
*
* @param row
* @param graph
* @param text
* @param startTokenIndex token index of the first token in the match
* @param endTokenIndex token index of the last token in the match
*/
private static void splitRowsOnIslands(Row row, final SDocumentGraph graph, STextualDS text, long startTokenIndex, long endTokenIndex) {
BitSet tokenCoverage = new BitSet();
// get the sorted token
List<SToken> sortedTokenList = graph.getSortedTokenByText();
// add all token belonging to the right text to the bit set
ListIterator<SToken> itToken = sortedTokenList.listIterator();
while (itToken.hasNext()) {
SToken t = itToken.next();
if (text == null || text == CommonHelper.getTextualDSForNode(t, graph)) {
RelannisNodeFeature feat = (RelannisNodeFeature) t.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
long tokenIndexRaw = feat.getTokenIndex();
tokenIndexRaw = clip(tokenIndexRaw, startTokenIndex, endTokenIndex);
int tokenIndex = (int) (tokenIndexRaw - startTokenIndex);
tokenCoverage.set(tokenIndex);
}
}
ListIterator<GridEvent> itEvents = row.getEvents().listIterator();
while (itEvents.hasNext()) {
GridEvent event = itEvents.next();
BitSet eventBitSet = new BitSet();
eventBitSet.set(event.getLeft(), event.getRight() + 1);
// restrict event bitset on the locations where token are present
eventBitSet.and(tokenCoverage);
// and we need to split it
if (eventBitSet.nextClearBit(event.getLeft()) <= event.getRight()) {
// remove the original event
row.removeEvent(itEvents);
// The event bitset now marks all the locations which the event should
// cover.
// Make a list of new events for each connected range in the bitset
int subElement = 0;
int offset = eventBitSet.nextSetBit(0);
while (offset >= 0) {
int end = eventBitSet.nextClearBit(offset) - 1;
if (offset < end) {
GridEvent newEvent = new GridEvent(event);
newEvent.setId(event.getId() + "_islandsplit_" + subElement++);
newEvent.setLeft(offset);
newEvent.setRight(end);
row.addEvent(itEvents, newEvent);
}
offset = eventBitSet.nextSetBit(end + 1);
}
}
// end if we need to split
}
}
use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.
the class EventExtractor method addAnnotationsForNode.
private static void addAnnotationsForNode(SNode node, SDocumentGraph graph, long startTokenIndex, long endTokenIndex, PDFController pdfController, PDFPageHelper pageNumberHelper, AtomicInteger eventCounter, LinkedHashMap<String, ArrayList<Row>> rowsByAnnotation, boolean addMatch, Set<String> mediaLayer, boolean replaceValueWithMediaIcon) {
List<String> matchedAnnos = new ArrayList<>();
SFeature featMatchedAnnos = graph.getFeature(ANNIS_NS, FEAT_MATCHEDANNOS);
if (featMatchedAnnos != null) {
matchedAnnos = Splitter.on(',').trimResults().splitToList(featMatchedAnnos.getValue_STEXT());
}
// check if the span is a matched node
SFeature featMatched = node.getFeature(ANNIS_NS, FEAT_MATCHEDNODE);
Long matchRaw = featMatched == null ? null : featMatched.getValue_SNUMERIC();
String matchedQualifiedAnnoName = "";
if (matchRaw != null && matchRaw <= matchedAnnos.size()) {
matchedQualifiedAnnoName = matchedAnnos.get((int) ((long) matchRaw) - 1);
}
// calculate the left and right values of a span
// TODO: howto get these numbers with Salt?
RelannisNodeFeature feat = (RelannisNodeFeature) node.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
long leftLong = feat.getLeftToken();
long rightLong = feat.getRightToken();
leftLong = clip(leftLong, startTokenIndex, endTokenIndex);
rightLong = clip(rightLong, startTokenIndex, endTokenIndex);
int left = (int) (leftLong - startTokenIndex);
int right = (int) (rightLong - startTokenIndex);
for (SAnnotation anno : node.getAnnotations()) {
ArrayList<Row> rows = rowsByAnnotation.get(anno.getQName());
if (rows == null) {
// try again with only the name
rows = rowsByAnnotation.get(anno.getName());
}
if (rows != null) {
// only do something if the annotation was defined before
// 1. give each annotation of each span an own row
Row r = new Row();
String id = "event_" + eventCounter.incrementAndGet();
GridEvent event = new GridEvent(id, left, right, anno.getValue_STEXT());
event.setTooltip(Helper.getQualifiedName(anno));
if (addMatch && matchRaw != null) {
long match = matchRaw;
if (matchedQualifiedAnnoName.isEmpty()) {
// always set the match when there is no matched annotation at all
event.setMatch(match);
} else // check if the annotation also matches
if (matchedQualifiedAnnoName.equals(anno.getQName())) {
event.setMatch(match);
}
}
if (node instanceof SSpan) {
// calculate overlapped SToken
List<? extends SRelation<? extends SNode, ? extends SNode>> outEdges = graph.getOutRelations(node.getId());
if (outEdges != null) {
for (SRelation<? extends SNode, ? extends SNode> e : outEdges) {
if (e instanceof SSpanningRelation) {
SSpanningRelation spanRel = (SSpanningRelation) e;
SToken tok = spanRel.getTarget();
event.getCoveredIDs().add(tok.getId());
// get the STextualDS of this token and add it to the event
String textID = getTextID(tok, graph);
if (textID != null) {
event.setTextID(textID);
}
}
}
}
// end if span has out edges
} else if (node instanceof SToken) {
event.getCoveredIDs().add(node.getId());
// get the STextualDS of this token and add it to the event
String textID = getTextID((SToken) node, graph);
if (textID != null) {
event.setTextID(textID);
}
}
// try to get time annotations
if (mediaLayer == null || mediaLayer.contains(anno.getQName())) {
double[] startEndTime = TimeHelper.getOverlappedTime(node);
if (startEndTime.length == 1) {
if (replaceValueWithMediaIcon) {
event.setValue(" ");
event.setTooltip("play excerpt " + event.getStartTime());
}
event.setStartTime(startEndTime[0]);
} else if (startEndTime.length == 2) {
event.setStartTime(startEndTime[0]);
event.setEndTime(startEndTime[1]);
if (replaceValueWithMediaIcon) {
event.setValue(" ");
event.setTooltip("play excerpt " + event.getStartTime() + "-" + event.getEndTime());
}
}
}
r.addEvent(event);
rows.add(r);
if (pdfController != null && pdfController.sizeOfRegisterdPDFViewer() > 0) {
String page = pageNumberHelper.getPageFromAnnotation(node);
if (page != null) {
event.setPage(page);
}
}
}
}
// end for each annotation of span
}
use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.
the class RSTImpl method getOutGoingEdgeTypeAnnotation.
private JSONArray getOutGoingEdgeTypeAnnotation(SNode node) throws JSONException {
List<SRelation<SNode, SNode>> out = node.getGraph().getOutRelations(node.getId());
String type;
Set<SAnnotation> annos;
JSONArray edgeData = new JSONArray();
// check if there is a pointing relation
if (out == null) {
return edgeData;
}
for (SRelation<SNode, SNode> edge : out) {
if (!(edge instanceof SRelation) || edge.getTarget() instanceof SToken) {
continue;
}
type = ((SRelation) edge).getType();
String sTypeAsString = "edge";
if (type != null && !type.isEmpty()) {
sTypeAsString = type;
}
JSONObject jsonEdge = new JSONObject();
edgeData.put(jsonEdge);
jsonEdge.put("sType", sTypeAsString);
if (((SRelation) edge).getTarget() instanceof SNode) {
/**
* Invert the direction of the RST-edge.
*/
if (getRSTType().equals(sTypeAsString)) {
jsonEdge.put("to", getUniStrId(node));
jsonEdge.put("from", getUniStrId((SNode) ((SRelation) edge).getTarget()));
} else {
jsonEdge.put("from", getUniStrId(node));
jsonEdge.put("to", getUniStrId((SNode) ((SRelation) edge).getTarget()));
}
} else {
throw new JSONException("could not cast to SNode");
}
annos = edge.getAnnotations();
if (annos != null) {
for (SAnnotation anno : annos) {
getOrCreateArray(jsonEdge, "annotation").put(anno.getValue_STEXT());
}
}
}
return edgeData;
}
use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.
the class TextColumnExporter method outputText.
/**
* Writes the specified record (if applicable, as multiple result lines) from query result set to the output file.
*
* @param graph the org.corpus_tools.salt.common.SDocumentGraph representation of a specified record
* @param alignmc a boolean, which indicates, whether the data should be aligned by match numbers or not
* @param recordNumber the number of record within the record set
* @param out the specified Writer
*
* @throws IOException, if an I/O error occurs
*/
@Override
public void outputText(SDocumentGraph graph, boolean alignmc, int recordNumber, Writer out) throws IOException {
String currSpeakerName = "";
String prevSpeakerName = "";
if (graph != null) {
List<SToken> orderedToken = graph.getSortedTokenByText();
if (orderedToken != null) {
// iterate over token
ListIterator<SToken> it = orderedToken.listIterator();
long lastTokenWasMatched = -1;
boolean noPreviousTokenInLine = false;
// if match number == 0, reset global variables and output warning, if necessary
if (recordNumber == 0) {
isFirstSpeakerWithMatch = true;
counterGlobal = 0;
// create warning message
String numbersString = "";
String warnMessage = "";
StringBuilder sb = new StringBuilder();
List<Integer> copyOfFilterNumbersSetByUser = new ArrayList<Integer>();
for (Long filterNumber : filterNumbersSetByUser) {
copyOfFilterNumbersSetByUser.add(Integer.parseInt(String.valueOf(filterNumber)));
}
for (Integer matchNumberGlobal : matchNumbersGlobal) {
copyOfFilterNumbersSetByUser.remove(matchNumberGlobal);
}
Collections.sort(copyOfFilterNumbersSetByUser);
if (!copyOfFilterNumbersSetByUser.isEmpty()) {
for (Integer filterNumber : copyOfFilterNumbersSetByUser) {
sb.append(filterNumber + ", ");
}
if (copyOfFilterNumbersSetByUser.size() == 1) {
numbersString = "number";
} else {
numbersString = "numbers";
}
warnMessage = "1. Filter " + numbersString + " " + sb.toString().substring(0, sb.lastIndexOf(",")) + " couldn't be represented.";
}
if (alignmc && !dataIsAlignable) {
if (!warnMessage.isEmpty()) {
warnMessage += (NEWLINE + NEWLINE + "2. ");
} else {
warnMessage += "1. ";
}
warnMessage += "You have tried to align matches by node number via check box." + "Unfortunately this option is not applicable for this data set, " + "so the data couldn't be aligned.";
}
if (!warnMessage.isEmpty()) {
String warnCaption = "Some export options couldn't be realized.";
Notification warn = new Notification(warnCaption, warnMessage, Notification.Type.WARNING_MESSAGE);
warn.setDelayMsec(20000);
warn.show(Page.getCurrent());
}
}
// global variables reset; warning issued
int matchesWrittenForSpeaker = 0;
while (it.hasNext()) {
SToken tok = it.next();
counterGlobal++;
// get current speaker name
String name;
if ((name = CommonHelper.getTextualDSForNode(tok, graph).getName()) == null) {
name = "";
}
currSpeakerName = (recordNumber + 1) + "_" + name;
// if speaker has no matches, skip token
if (speakerHasMatches.get(currSpeakerName) == false) {
prevSpeakerName = currSpeakerName;
// continue;
} else // if speaker has matches
{
// if the current speaker is new, write header and append his name
if (!currSpeakerName.equals(prevSpeakerName)) {
// reset the counter of matches, which were written for this speaker
matchesWrittenForSpeaker = 0;
if (isFirstSpeakerWithMatch) {
out.append("match_number" + TAB_MARK);
out.append("speaker" + TAB_MARK);
// write header for meta data columns
if (!listOfMetakeys.isEmpty()) {
for (String metakey : listOfMetakeys) {
out.append(metakey + TAB_MARK);
}
}
out.append("left_context" + TAB_MARK);
String prefixAlignmc = "match_";
String prefix = "match_column";
String middle_context = "middle_context_";
if (alignmc && dataIsAlignable) {
for (int i = 0; i < orderedMatchNumbersGlobal.size(); i++) {
out.append(prefixAlignmc + orderedMatchNumbersGlobal.get(i) + TAB_MARK);
if (i < orderedMatchNumbersGlobal.size() - 1) {
out.append(middle_context + (i + 1) + TAB_MARK);
}
}
} else {
for (int i = 0; i < maxMatchesPerLine; i++) {
out.append(prefix + TAB_MARK);
if (i < (maxMatchesPerLine - 1)) {
out.append(middle_context + (i + 1) + TAB_MARK);
}
}
}
out.append("right_context");
out.append(NEWLINE);
isFirstSpeakerWithMatch = false;
} else {
out.append(NEWLINE);
}
out.append(String.valueOf(recordNumber + 1) + TAB_MARK);
String trimmedName = "";
if (currSpeakerName.indexOf("_") < currSpeakerName.length()) {
trimmedName = currSpeakerName.substring(currSpeakerName.indexOf("_") + 1);
}
out.append(trimmedName + TAB_MARK);
// write meta data
if (!listOfMetakeys.isEmpty()) {
// get metadata
String docName = graph.getDocument().getName();
List<String> corpusPath = CommonHelper.getCorpusPath(graph.getDocument().getGraph(), graph.getDocument());
String corpusName = corpusPath.get(corpusPath.size() - 1);
corpusName = urlPathEscape.escape(corpusName);
List<Annotation> metadata = Helper.getMetaData(corpusName, docName);
Map<String, String> annosWithoutNamespace = new HashMap<String, String>();
Map<String, Map<String, String>> annosWithNamespace = new HashMap<String, Map<String, String>>();
// put metadata annotations into hash maps for better access
for (Annotation metaAnno : metadata) {
String ns;
Map<String, String> data = new HashMap<String, String>();
data.put(metaAnno.getName(), metaAnno.getValue());
// a namespace is present
if ((ns = metaAnno.getNamespace()) != null && !ns.isEmpty()) {
Map<String, String> nsMetadata = new HashMap<String, String>();
if (annosWithNamespace.get(ns) != null) {
nsMetadata = annosWithNamespace.get(ns);
}
nsMetadata.putAll(data);
annosWithNamespace.put(ns, nsMetadata);
} else {
annosWithoutNamespace.putAll(data);
}
}
for (String metakey : listOfMetakeys) {
String metaValue = "";
// try to get meta value specific for current speaker
if (!trimmedName.isEmpty() && annosWithNamespace.containsKey(trimmedName)) {
Map<String, String> speakerAnnos = annosWithNamespace.get(trimmedName);
if (speakerAnnos.containsKey(metakey)) {
metaValue = speakerAnnos.get(metakey).trim();
}
}
// try to get meta value, if metaValue is not set
if (metaValue.isEmpty() && annosWithoutNamespace.containsKey(metakey)) {
metaValue = annosWithoutNamespace.get(metakey).trim();
}
out.append(metaValue + TAB_MARK);
}
}
// metadata written
lastTokenWasMatched = -1;
noPreviousTokenInLine = true;
}
// header, speaker name and metadata ready
// default to space as separator
String separator = SPACE;
List<SNode> root = new LinkedList<>();
root.add(tok);
Long matchedNode;
// token matched
if ((matchedNode = tokenToMatchNumber.get(counterGlobal)) != null) {
// is dominated by a (new) matched node, thus use tab to separate the non-matches from the matches
if (lastTokenWasMatched < 0) {
if (alignmc && dataIsAlignable) {
int orderInList = orderedMatchNumbersGlobal.indexOf(matchedNode);
if (orderInList >= matchesWrittenForSpeaker) {
int diff = orderInList - matchesWrittenForSpeaker;
matchesWrittenForSpeaker++;
StringBuilder sb = new StringBuilder(TAB_MARK);
for (int i = 0; i < diff; i++) {
sb.append(TAB_MARK + TAB_MARK);
matchesWrittenForSpeaker++;
}
separator = sb.toString();
}
} else {
separator = TAB_MARK;
}
} else if (lastTokenWasMatched != matchedNode) {
// always leave an empty column between two matches, even if there is no actual context
if (alignmc && dataIsAlignable) {
int orderInList = orderedMatchNumbersGlobal.indexOf(matchedNode);
if (orderInList >= matchesWrittenForSpeaker) {
int diff = orderInList - matchesWrittenForSpeaker;
matchesWrittenForSpeaker++;
StringBuilder sb = new StringBuilder(TAB_MARK + TAB_MARK);
for (int i = 0; i < diff; i++) {
sb.append(TAB_MARK + TAB_MARK);
matchesWrittenForSpeaker++;
}
separator = sb.toString();
}
} else {
separator = TAB_MARK + TAB_MARK;
}
}
lastTokenWasMatched = matchedNode;
} else // token not matched, but last token matched
if (lastTokenWasMatched >= 0) {
// handle crossing edges
if (!tokenToMatchNumber.containsKey(counterGlobal) && tokenToMatchNumber.containsKey(counterGlobal - 1) && tokenToMatchNumber.containsKey(counterGlobal + 1)) {
if (Objects.equals(tokenToMatchNumber.get(counterGlobal - 1), tokenToMatchNumber.get(counterGlobal + 1))) {
separator = SPACE;
lastTokenWasMatched = tokenToMatchNumber.get(counterGlobal + 1);
} else {
separator = TAB_MARK;
lastTokenWasMatched = -1;
}
} else // mark the end of a match with the tab
{
separator = TAB_MARK;
lastTokenWasMatched = -1;
}
}
// if tok is the first token in the line and not matched, set separator to empty string
if (noPreviousTokenInLine && separator.equals(SPACE)) {
separator = "";
}
out.append(separator);
// append the current token
out.append(graph.getText(tok));
noPreviousTokenInLine = false;
prevSpeakerName = currSpeakerName;
}
}
}
}
}
use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.
the class Helper method calculateMarkedAndCoveredIDs.
public static Map<String, Long> calculateMarkedAndCoveredIDs(SDocument doc, List<SNode> segNodes, String segmentationName) {
Map<String, Long> initialCovered = new HashMap<>();
// add all covered nodes
for (SNode n : doc.getDocumentGraph().getNodes()) {
SFeature featMatched = n.getFeature(ANNIS_NS, FEAT_MATCHEDNODE);
Long match = featMatched == null ? null : featMatched.getValue_SNUMERIC();
if (match != null) {
initialCovered.put(n.getId(), match);
}
}
// calculate covered nodes
CoveredMatchesCalculator cmc = new CoveredMatchesCalculator(doc.getDocumentGraph(), initialCovered);
Map<String, Long> covered = cmc.getMatchedAndCovered();
if (segmentationName != null) {
// filter token
Map<SToken, Long> coveredToken = new HashMap<>();
for (Map.Entry<String, Long> e : covered.entrySet()) {
SNode n = doc.getDocumentGraph().getNode(e.getKey());
if (n instanceof SToken) {
coveredToken.put((SToken) n, e.getValue());
}
}
for (SNode segNode : segNodes) {
RelannisNodeFeature featSegNode = (RelannisNodeFeature) segNode.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
if (!covered.containsKey(segNode.getId())) {
long leftTok = featSegNode.getLeftToken();
long rightTok = featSegNode.getRightToken();
// check for each covered token if this segment is covering it
for (Map.Entry<SToken, Long> e : coveredToken.entrySet()) {
RelannisNodeFeature featTok = (RelannisNodeFeature) e.getKey().getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
long entryTokenIndex = featTok.getTokenIndex();
if (entryTokenIndex <= rightTok && entryTokenIndex >= leftTok) {
// add this segmentation node to the covered set
covered.put(segNode.getId(), e.getValue());
break;
}
}
// end for each covered token
}
// end if not already contained
}
// end for each segmentation node
}
return covered;
}
Aggregations