use of annis.model.RelannisNodeFeature in project ANNIS by korpling.
the class SaltAnnotateExtractor method setFeaturesForNode.
private void setFeaturesForNode(SStructuredNode node, long internalID, ResultSet resultSet) throws SQLException {
SFeature feat = SaltFactory.createSFeature();
RelannisNodeFeature val = new RelannisNodeFeature();
val.setInternalID(longValue(resultSet, "node", "id"));
val.setCorpusRef(longValue(resultSet, "node", "corpus_ref"));
val.setTextRef(longValue(resultSet, "node", "text_ref"));
val.setLeft(longValue(resultSet, "node", "left"));
val.setLeftToken(longValue(resultSet, "node", "left_token"));
val.setRight(longValue(resultSet, "node", "right"));
val.setRightToken(longValue(resultSet, "node", "right_token"));
val.setTokenIndex(longValue(resultSet, "node", "token_index"));
val.setSegIndex(longValue(resultSet, "node", "seg_index"));
val.setSegName(stringValue(resultSet, "node", "seg_name"));
the class EventExtractor method splitRowsOnIslands.
* Splits events of a row if they overlap an island. Islands are areas between
* the token which are included in the result.
* @param row
* @param graph
* @param text
* @param startTokenIndex token index of the first token in the match
* @param endTokenIndex token index of the last token in the match
private static void splitRowsOnIslands(Row row, final SDocumentGraph graph, STextualDS text, long startTokenIndex, long endTokenIndex) {
BitSet tokenCoverage = new BitSet();
// get the sorted token
List<SToken> sortedTokenList = graph.getSortedTokenByText();
// add all token belonging to the right text to the bit set
ListIterator<SToken> itToken = sortedTokenList.listIterator();
while (itToken.hasNext()) {
SToken t =;
if (text == null || text == CommonHelper.getTextualDSForNode(t, graph)) {
RelannisNodeFeature feat = (RelannisNodeFeature) t.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
long tokenIndexRaw = feat.getTokenIndex();
tokenIndexRaw = clip(tokenIndexRaw, startTokenIndex, endTokenIndex);
int tokenIndex = (int) (tokenIndexRaw - startTokenIndex);
ListIterator<GridEvent> itEvents = row.getEvents().listIterator();
while (itEvents.hasNext()) {
GridEvent event =;
BitSet eventBitSet = new BitSet();
eventBitSet.set(event.getLeft(), event.getRight() + 1);
// restrict event bitset on the locations where token are present
// and we need to split it
if (eventBitSet.nextClearBit(event.getLeft()) <= event.getRight()) {
// remove the original event
// The event bitset now marks all the locations which the event should
// cover.
// Make a list of new events for each connected range in the bitset
int subElement = 0;
int offset = eventBitSet.nextSetBit(0);
while (offset >= 0) {
int end = eventBitSet.nextClearBit(offset) - 1;
if (offset < end) {
GridEvent newEvent = new GridEvent(event);
newEvent.setId(event.getId() + "_islandsplit_" + subElement++);
row.addEvent(itEvents, newEvent);
offset = eventBitSet.nextSetBit(end + 1);
// end if we need to split
the class EventExtractor method addAnnotationsForNode.
private static void addAnnotationsForNode(SNode node, SDocumentGraph graph, long startTokenIndex, long endTokenIndex, PDFController pdfController, PDFPageHelper pageNumberHelper, AtomicInteger eventCounter, LinkedHashMap<String, ArrayList<Row>> rowsByAnnotation, boolean addMatch, Set<String> mediaLayer, boolean replaceValueWithMediaIcon) {
List<String> matchedAnnos = new ArrayList<>();
SFeature featMatchedAnnos = graph.getFeature(ANNIS_NS, FEAT_MATCHEDANNOS);
if (featMatchedAnnos != null) {
matchedAnnos = Splitter.on(',').trimResults().splitToList(featMatchedAnnos.getValue_STEXT());
// check if the span is a matched node
SFeature featMatched = node.getFeature(ANNIS_NS, FEAT_MATCHEDNODE);
Long matchRaw = featMatched == null ? null : featMatched.getValue_SNUMERIC();
String matchedQualifiedAnnoName = "";
if (matchRaw != null && matchRaw <= matchedAnnos.size()) {
matchedQualifiedAnnoName = matchedAnnos.get((int) ((long) matchRaw) - 1);
// calculate the left and right values of a span
// TODO: howto get these numbers with Salt?
RelannisNodeFeature feat = (RelannisNodeFeature) node.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
long leftLong = feat.getLeftToken();
long rightLong = feat.getRightToken();
leftLong = clip(leftLong, startTokenIndex, endTokenIndex);
rightLong = clip(rightLong, startTokenIndex, endTokenIndex);
int left = (int) (leftLong - startTokenIndex);
int right = (int) (rightLong - startTokenIndex);
for (SAnnotation anno : node.getAnnotations()) {
ArrayList<Row> rows = rowsByAnnotation.get(anno.getQName());
if (rows == null) {
// try again with only the name
rows = rowsByAnnotation.get(anno.getName());
if (rows != null) {
// only do something if the annotation was defined before
// 1. give each annotation of each span an own row
Row r = new Row();
String id = "event_" + eventCounter.incrementAndGet();
GridEvent event = new GridEvent(id, left, right, anno.getValue_STEXT());
if (addMatch && matchRaw != null) {
long match = matchRaw;
if (matchedQualifiedAnnoName.isEmpty()) {
// always set the match when there is no matched annotation at all
} else // check if the annotation also matches
if (matchedQualifiedAnnoName.equals(anno.getQName())) {
if (node instanceof SSpan) {
// calculate overlapped SToken
List<? extends SRelation<? extends SNode, ? extends SNode>> outEdges = graph.getOutRelations(node.getId());
if (outEdges != null) {
for (SRelation<? extends SNode, ? extends SNode> e : outEdges) {
if (e instanceof SSpanningRelation) {
SSpanningRelation spanRel = (SSpanningRelation) e;
SToken tok = spanRel.getTarget();
// get the STextualDS of this token and add it to the event
String textID = getTextID(tok, graph);
if (textID != null) {
// end if span has out edges
} else if (node instanceof SToken) {
// get the STextualDS of this token and add it to the event
String textID = getTextID((SToken) node, graph);
if (textID != null) {
// try to get time annotations
if (mediaLayer == null || mediaLayer.contains(anno.getQName())) {
double[] startEndTime = TimeHelper.getOverlappedTime(node);
if (startEndTime.length == 1) {
if (replaceValueWithMediaIcon) {
event.setValue(" ");
event.setTooltip("play excerpt " + event.getStartTime());
} else if (startEndTime.length == 2) {
if (replaceValueWithMediaIcon) {
event.setValue(" ");
event.setTooltip("play excerpt " + event.getStartTime() + "-" + event.getEndTime());
if (pdfController != null && pdfController.sizeOfRegisterdPDFViewer() > 0) {
String page = pageNumberHelper.getPageFromAnnotation(node);
if (page != null) {
// end for each annotation of span
the class CSVMultiTokExporter method outputText.
* Takes a match and outputs a csv-line
* @param graph
* @param alignmc
* @param matchNumber
* @param out
* @throws
public void outputText(SDocumentGraph graph, boolean alignmc, int matchNumber, Writer out) throws IOException, IllegalArgumentException {
// first match
if (matchNumber == 0) {
// output header
List<String> headerLine = new ArrayList<>();
for (Map.Entry<Integer, TreeSet<String>> match : annotationsForMatchedNodes.entrySet()) {
int node_id = match.getKey();
headerLine.add(String.valueOf(node_id) + "_id");
headerLine.add(String.valueOf(node_id) + "_span");
for (String annoName : match.getValue()) {
headerLine.add(String.valueOf(node_id) + "_anno_" + annoName);
for (String key : metakeys) {
headerLine.add("meta_" + key);
out.append(StringUtils.join(headerLine, "\t"));
// output nodes in the order of the matches
SortedMap<Integer, String> contentLine = new TreeMap<>();
for (SNode node : this.getMatchedNodes(graph)) {
List<String> nodeLine = new ArrayList<>();
// export id
RelannisNodeFeature feats = RelannisNodeFeature.extract(node);
// export spanned text
String span = graph.getText(node);
if (span != null)
// export annotations
int node_id = node.getFeature(AnnisConstants.ANNIS_NS, AnnisConstants.FEAT_MATCHEDNODE).getValue_SNUMERIC().intValue();
for (String annoName : annotationsForMatchedNodes.get(node_id)) {
SAnnotation anno = node.getAnnotation(annoName);
if (anno != null) {
} else
// add everything to line
contentLine.put(node_id, StringUtils.join(nodeLine, "\t"));
out.append(StringUtils.join(contentLine.values(), "\t"));
// TODO cache the metadata
if (!metakeys.isEmpty()) {
// TODO is this the best way to get the corpus name?
String corpus_name = CommonHelper.getCorpusPath(;
List<Annotation> asList = Helper.getMetaData(corpus_name, graph.getDocument().getName());
for (Annotation anno : asList) {
if (metakeys.contains(anno.getName()))
out.append("\t" + anno.getValue());
the class Helper method calculateMarkedAndCoveredIDs.
public static Map<String, Long> calculateMarkedAndCoveredIDs(SDocument doc, List<SNode> segNodes, String segmentationName) {
Map<String, Long> initialCovered = new HashMap<>();
// add all covered nodes
for (SNode n : doc.getDocumentGraph().getNodes()) {
SFeature featMatched = n.getFeature(ANNIS_NS, FEAT_MATCHEDNODE);
Long match = featMatched == null ? null : featMatched.getValue_SNUMERIC();
if (match != null) {
initialCovered.put(n.getId(), match);
// calculate covered nodes
CoveredMatchesCalculator cmc = new CoveredMatchesCalculator(doc.getDocumentGraph(), initialCovered);
Map<String, Long> covered = cmc.getMatchedAndCovered();
if (segmentationName != null) {
// filter token
Map<SToken, Long> coveredToken = new HashMap<>();
for (Map.Entry<String, Long> e : covered.entrySet()) {
SNode n = doc.getDocumentGraph().getNode(e.getKey());
if (n instanceof SToken) {
coveredToken.put((SToken) n, e.getValue());
for (SNode segNode : segNodes) {
RelannisNodeFeature featSegNode = (RelannisNodeFeature) segNode.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
if (!covered.containsKey(segNode.getId())) {
long leftTok = featSegNode.getLeftToken();
long rightTok = featSegNode.getRightToken();
// check for each covered token if this segment is covering it
for (Map.Entry<SToken, Long> e : coveredToken.entrySet()) {
RelannisNodeFeature featTok = (RelannisNodeFeature) e.getKey().getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
long entryTokenIndex = featTok.getTokenIndex();
if (entryTokenIndex <= rightTok && entryTokenIndex >= leftTok) {
// add this segmentation node to the covered set
covered.put(segNode.getId(), e.getValue());
// end for each covered token
// end if not already contained
// end for each segmentation node
return covered;