use of annis.model.RelannisNodeFeature in project ANNIS by korpling.
the class SaltAnnotateExtractor method setFeaturesForNode.
private void setFeaturesForNode(SStructuredNode node, long internalID, ResultSet resultSet) throws SQLException {
SFeature feat = SaltFactory.createSFeature();
feat.setNamespace(ANNIS_NS);
feat.setName(FEAT_RELANNIS_NODE);
RelannisNodeFeature val = new RelannisNodeFeature();
val.setInternalID(longValue(resultSet, "node", "id"));
val.setCorpusRef(longValue(resultSet, "node", "corpus_ref"));
val.setTextRef(longValue(resultSet, "node", "text_ref"));
val.setLeft(longValue(resultSet, "node", "left"));
val.setLeftToken(longValue(resultSet, "node", "left_token"));
val.setRight(longValue(resultSet, "node", "right"));
val.setRightToken(longValue(resultSet, "node", "right_token"));
val.setTokenIndex(longValue(resultSet, "node", "token_index"));
val.setSegIndex(longValue(resultSet, "node", "seg_index"));
val.setSegName(stringValue(resultSet, "node", "seg_name"));
feat.setValue(val);
node.addFeature(feat);
}
use of annis.model.RelannisNodeFeature in project ANNIS by korpling.
the class EventExtractor method splitRowsOnIslands.
/**
* Splits events of a row if they overlap an island. Islands are areas between
* the token which are included in the result.
*
* @param row
* @param graph
* @param text
* @param startTokenIndex token index of the first token in the match
* @param endTokenIndex token index of the last token in the match
*/
private static void splitRowsOnIslands(Row row, final SDocumentGraph graph, STextualDS text, long startTokenIndex, long endTokenIndex) {
BitSet tokenCoverage = new BitSet();
// get the sorted token
List<SToken> sortedTokenList = graph.getSortedTokenByText();
// add all token belonging to the right text to the bit set
ListIterator<SToken> itToken = sortedTokenList.listIterator();
while (itToken.hasNext()) {
SToken t = itToken.next();
if (text == null || text == CommonHelper.getTextualDSForNode(t, graph)) {
RelannisNodeFeature feat = (RelannisNodeFeature) t.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
long tokenIndexRaw = feat.getTokenIndex();
tokenIndexRaw = clip(tokenIndexRaw, startTokenIndex, endTokenIndex);
int tokenIndex = (int) (tokenIndexRaw - startTokenIndex);
tokenCoverage.set(tokenIndex);
}
}
ListIterator<GridEvent> itEvents = row.getEvents().listIterator();
while (itEvents.hasNext()) {
GridEvent event = itEvents.next();
BitSet eventBitSet = new BitSet();
eventBitSet.set(event.getLeft(), event.getRight() + 1);
// restrict event bitset on the locations where token are present
eventBitSet.and(tokenCoverage);
// and we need to split it
if (eventBitSet.nextClearBit(event.getLeft()) <= event.getRight()) {
// remove the original event
row.removeEvent(itEvents);
// The event bitset now marks all the locations which the event should
// cover.
// Make a list of new events for each connected range in the bitset
int subElement = 0;
int offset = eventBitSet.nextSetBit(0);
while (offset >= 0) {
int end = eventBitSet.nextClearBit(offset) - 1;
if (offset < end) {
GridEvent newEvent = new GridEvent(event);
newEvent.setId(event.getId() + "_islandsplit_" + subElement++);
newEvent.setLeft(offset);
newEvent.setRight(end);
row.addEvent(itEvents, newEvent);
}
offset = eventBitSet.nextSetBit(end + 1);
}
}
// end if we need to split
}
}
use of annis.model.RelannisNodeFeature in project ANNIS by korpling.
the class EventExtractor method addAnnotationsForNode.
private static void addAnnotationsForNode(SNode node, SDocumentGraph graph, long startTokenIndex, long endTokenIndex, PDFController pdfController, PDFPageHelper pageNumberHelper, AtomicInteger eventCounter, LinkedHashMap<String, ArrayList<Row>> rowsByAnnotation, boolean addMatch, Set<String> mediaLayer, boolean replaceValueWithMediaIcon) {
List<String> matchedAnnos = new ArrayList<>();
SFeature featMatchedAnnos = graph.getFeature(ANNIS_NS, FEAT_MATCHEDANNOS);
if (featMatchedAnnos != null) {
matchedAnnos = Splitter.on(',').trimResults().splitToList(featMatchedAnnos.getValue_STEXT());
}
// check if the span is a matched node
SFeature featMatched = node.getFeature(ANNIS_NS, FEAT_MATCHEDNODE);
Long matchRaw = featMatched == null ? null : featMatched.getValue_SNUMERIC();
String matchedQualifiedAnnoName = "";
if (matchRaw != null && matchRaw <= matchedAnnos.size()) {
matchedQualifiedAnnoName = matchedAnnos.get((int) ((long) matchRaw) - 1);
}
// calculate the left and right values of a span
// TODO: howto get these numbers with Salt?
RelannisNodeFeature feat = (RelannisNodeFeature) node.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
long leftLong = feat.getLeftToken();
long rightLong = feat.getRightToken();
leftLong = clip(leftLong, startTokenIndex, endTokenIndex);
rightLong = clip(rightLong, startTokenIndex, endTokenIndex);
int left = (int) (leftLong - startTokenIndex);
int right = (int) (rightLong - startTokenIndex);
for (SAnnotation anno : node.getAnnotations()) {
ArrayList<Row> rows = rowsByAnnotation.get(anno.getQName());
if (rows == null) {
// try again with only the name
rows = rowsByAnnotation.get(anno.getName());
}
if (rows != null) {
// only do something if the annotation was defined before
// 1. give each annotation of each span an own row
Row r = new Row();
String id = "event_" + eventCounter.incrementAndGet();
GridEvent event = new GridEvent(id, left, right, anno.getValue_STEXT());
event.setTooltip(Helper.getQualifiedName(anno));
if (addMatch && matchRaw != null) {
long match = matchRaw;
if (matchedQualifiedAnnoName.isEmpty()) {
// always set the match when there is no matched annotation at all
event.setMatch(match);
} else // check if the annotation also matches
if (matchedQualifiedAnnoName.equals(anno.getQName())) {
event.setMatch(match);
}
}
if (node instanceof SSpan) {
// calculate overlapped SToken
List<? extends SRelation<? extends SNode, ? extends SNode>> outEdges = graph.getOutRelations(node.getId());
if (outEdges != null) {
for (SRelation<? extends SNode, ? extends SNode> e : outEdges) {
if (e instanceof SSpanningRelation) {
SSpanningRelation spanRel = (SSpanningRelation) e;
SToken tok = spanRel.getTarget();
event.getCoveredIDs().add(tok.getId());
// get the STextualDS of this token and add it to the event
String textID = getTextID(tok, graph);
if (textID != null) {
event.setTextID(textID);
}
}
}
}
// end if span has out edges
} else if (node instanceof SToken) {
event.getCoveredIDs().add(node.getId());
// get the STextualDS of this token and add it to the event
String textID = getTextID((SToken) node, graph);
if (textID != null) {
event.setTextID(textID);
}
}
// try to get time annotations
if (mediaLayer == null || mediaLayer.contains(anno.getQName())) {
double[] startEndTime = TimeHelper.getOverlappedTime(node);
if (startEndTime.length == 1) {
if (replaceValueWithMediaIcon) {
event.setValue(" ");
event.setTooltip("play excerpt " + event.getStartTime());
}
event.setStartTime(startEndTime[0]);
} else if (startEndTime.length == 2) {
event.setStartTime(startEndTime[0]);
event.setEndTime(startEndTime[1]);
if (replaceValueWithMediaIcon) {
event.setValue(" ");
event.setTooltip("play excerpt " + event.getStartTime() + "-" + event.getEndTime());
}
}
}
r.addEvent(event);
rows.add(r);
if (pdfController != null && pdfController.sizeOfRegisterdPDFViewer() > 0) {
String page = pageNumberHelper.getPageFromAnnotation(node);
if (page != null) {
event.setPage(page);
}
}
}
}
// end for each annotation of span
}
use of annis.model.RelannisNodeFeature in project ANNIS by korpling.
the class CSVMultiTokExporter method outputText.
/**
* Takes a match and outputs a csv-line
*
* @param graph
* @param alignmc
* @param matchNumber
* @param out
*
* @throws java.io.IOException
*/
@Override
public void outputText(SDocumentGraph graph, boolean alignmc, int matchNumber, Writer out) throws IOException, IllegalArgumentException {
// first match
if (matchNumber == 0) {
// output header
List<String> headerLine = new ArrayList<>();
for (Map.Entry<Integer, TreeSet<String>> match : annotationsForMatchedNodes.entrySet()) {
int node_id = match.getKey();
headerLine.add(String.valueOf(node_id) + "_id");
headerLine.add(String.valueOf(node_id) + "_span");
for (String annoName : match.getValue()) {
headerLine.add(String.valueOf(node_id) + "_anno_" + annoName);
}
}
for (String key : metakeys) {
headerLine.add("meta_" + key);
}
out.append(StringUtils.join(headerLine, "\t"));
out.append("\n");
}
// output nodes in the order of the matches
SortedMap<Integer, String> contentLine = new TreeMap<>();
for (SNode node : this.getMatchedNodes(graph)) {
List<String> nodeLine = new ArrayList<>();
// export id
RelannisNodeFeature feats = RelannisNodeFeature.extract(node);
nodeLine.add(String.valueOf(feats.getInternalID()));
// export spanned text
String span = graph.getText(node);
if (span != null)
nodeLine.add(graph.getText(node));
else
nodeLine.add("");
// export annotations
int node_id = node.getFeature(AnnisConstants.ANNIS_NS, AnnisConstants.FEAT_MATCHEDNODE).getValue_SNUMERIC().intValue();
for (String annoName : annotationsForMatchedNodes.get(node_id)) {
SAnnotation anno = node.getAnnotation(annoName);
if (anno != null) {
nodeLine.add(anno.getValue_STEXT());
} else
nodeLine.add("'NULL'");
}
// add everything to line
contentLine.put(node_id, StringUtils.join(nodeLine, "\t"));
}
out.append(StringUtils.join(contentLine.values(), "\t"));
// TODO cache the metadata
if (!metakeys.isEmpty()) {
// TODO is this the best way to get the corpus name?
String corpus_name = CommonHelper.getCorpusPath(java.net.URI.create(graph.getDocument().getId())).get(0);
List<Annotation> asList = Helper.getMetaData(corpus_name, graph.getDocument().getName());
for (Annotation anno : asList) {
if (metakeys.contains(anno.getName()))
out.append("\t" + anno.getValue());
}
}
out.append("\n");
}
use of annis.model.RelannisNodeFeature in project ANNIS by korpling.
the class Helper method calculateMarkedAndCoveredIDs.
public static Map<String, Long> calculateMarkedAndCoveredIDs(SDocument doc, List<SNode> segNodes, String segmentationName) {
Map<String, Long> initialCovered = new HashMap<>();
// add all covered nodes
for (SNode n : doc.getDocumentGraph().getNodes()) {
SFeature featMatched = n.getFeature(ANNIS_NS, FEAT_MATCHEDNODE);
Long match = featMatched == null ? null : featMatched.getValue_SNUMERIC();
if (match != null) {
initialCovered.put(n.getId(), match);
}
}
// calculate covered nodes
CoveredMatchesCalculator cmc = new CoveredMatchesCalculator(doc.getDocumentGraph(), initialCovered);
Map<String, Long> covered = cmc.getMatchedAndCovered();
if (segmentationName != null) {
// filter token
Map<SToken, Long> coveredToken = new HashMap<>();
for (Map.Entry<String, Long> e : covered.entrySet()) {
SNode n = doc.getDocumentGraph().getNode(e.getKey());
if (n instanceof SToken) {
coveredToken.put((SToken) n, e.getValue());
}
}
for (SNode segNode : segNodes) {
RelannisNodeFeature featSegNode = (RelannisNodeFeature) segNode.getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
if (!covered.containsKey(segNode.getId())) {
long leftTok = featSegNode.getLeftToken();
long rightTok = featSegNode.getRightToken();
// check for each covered token if this segment is covering it
for (Map.Entry<SToken, Long> e : coveredToken.entrySet()) {
RelannisNodeFeature featTok = (RelannisNodeFeature) e.getKey().getFeature(ANNIS_NS, FEAT_RELANNIS_NODE).getValue();
long entryTokenIndex = featTok.getTokenIndex();
if (entryTokenIndex <= rightTok && entryTokenIndex >= leftTok) {
// add this segmentation node to the covered set
covered.put(segNode.getId(), e.getValue());
break;
}
}
// end for each covered token
}
// end if not already contained
}
// end for each segmentation node
}
return covered;
}
Aggregations