use of annis.model.Annotation in project ANNIS by korpling.
the class AnnotatedSpanExtractor method mapRow.
@Override
public AnnotatedSpan mapRow(ResultSet resultSet, int rowNum) throws SQLException {
long id = resultSet.getLong("id");
String coveredText = resultSet.getString("span");
Array arrayAnnotation = resultSet.getArray("annotations");
ResultSetMetaData rsMeta = resultSet.getMetaData();
Array arrayMeta = null;
for (int i = 1; i <= rsMeta.getColumnCount(); i++) {
if ("metadata".equals(rsMeta.getColumnName(i))) {
arrayMeta = resultSet.getArray(i);
break;
}
}
List<Annotation> annotations = extractAnnotations(arrayAnnotation);
List<Annotation> metaData = arrayMeta == null ? new LinkedList<Annotation>() : extractAnnotations(arrayMeta);
// create key
Array sqlKey = resultSet.getArray("key");
Validate.isTrue(!resultSet.wasNull(), "Match group identifier must not be null");
Validate.isTrue(sqlKey.getBaseType() == Types.BIGINT, "Key in database must be from the type \"bigint\" but was \"" + sqlKey.getBaseTypeName() + "\"");
List<Long> key = Arrays.asList((Long[]) sqlKey.getArray());
return new AnnotatedSpan(id, coveredText, annotations, metaData, key);
}
use of annis.model.Annotation in project ANNIS by korpling.
the class AomAnnotateExtractor method extractData.
@Override
public List<AnnotationGraph> extractData(ResultSet resultSet) throws SQLException, DataAccessException {
TableAccessStrategy tableAccessStrategy = outerQueryTableAccessStrategy;
// function result
List<AnnotationGraph> graphs = new LinkedList<>();
// fn: match group -> annotation graph
Map<List<Long>, AnnotationGraph> graphByMatchGroup = new HashMap<>();
// fn: node id -> node
Map<Long, AnnisNode> nodeById = new HashMap<>();
// fn: edge pre order value -> edge
Map<Long, Edge> edgeByRankID = new HashMap<>();
// maps span that are continous to their coverage component
Map<List<Long>, Map<Long, ComponentEntry>> keyToSpanToComponent = new HashMap<>();
int rowNum = 0;
while (resultSet.next()) {
// process result by match group
// match group is identified by the ids of the matched
// nodes
Array sqlKey = resultSet.getArray("key");
Validate.isTrue(!resultSet.wasNull(), "Match group identifier must not be null");
Validate.isTrue(sqlKey.getBaseType() == Types.BIGINT, "Key in database must be from the type \"bigint\" but was \"" + sqlKey.getBaseTypeName() + "\"");
Long[] keyArray = (Long[]) sqlKey.getArray();
List<Long> key = Arrays.asList(keyArray);
if (!graphByMatchGroup.containsKey(key)) {
log.debug("starting annotation graph for match: " + key);
Map<Long, ComponentEntry> spans = new HashMap<>();
AnnotationGraph graph = new AnnotationGraph();
graphs.add(graph);
graphByMatchGroup.put(key, graph);
keyToSpanToComponent.put(key, spans);
// clear mapping functions for this graph
// assumes that the result set is sorted by key, pre
nodeById.clear();
edgeByRankID.clear();
// set the matched keys
for (Long l : key) {
if (l != null) {
graph.addMatchedNodeId(l);
}
}
}
AnnotationGraph graph = graphByMatchGroup.get(key);
Map<Long, ComponentEntry> spanToComponent = keyToSpanToComponent.get(key);
graph.setDocumentName(new DocumentNameMapRow().mapRow(resultSet, rowNum));
Array path = resultSet.getArray("path");
graph.setPath((String[]) path.getArray());
// get node data
AnnisNode node = mapNode(resultSet, tableAccessStrategy, spanToComponent);
// add node to graph if it is new, else get known copy
long id = node.getId();
if (!nodeById.containsKey(id)) {
log.debug("new node: " + id);
nodeById.put(id, node);
graph.addNode(node);
} else {
node = nodeById.get(id);
}
// we now have the id of the node and the general key,
// so we can
// add the matched node index to the graph (if matched)
long matchIndex = 1;
// node.setMatchedNodeInQuery(null);
for (Long l : key) {
if (l != null) {
if (id == l) {
node.setMatchedNodeInQuery(matchIndex);
break;
}
matchIndex++;
}
}
// get edge data
Edge edge = mapEdge(resultSet, tableAccessStrategy);
// add edge to graph if it is new, else get known copy
long rank_id = edge.getId();
if (!edgeByRankID.containsKey(rank_id)) {
// fix source references in edge
edge.setDestination(node);
fixNodes(edge, edgeByRankID, nodeById);
// add edge to src and dst nodes
node.addIncomingEdge(edge);
AnnisNode source = edge.getSource();
if (source != null) {
source.addOutgoingEdge(edge);
}
log.debug("new edge: " + edge);
edgeByRankID.put(edge.getId(), edge);
graph.addEdge(edge);
} else {
edge = edgeByRankID.get(rank_id);
}
// add annotation data
Annotation nodeAnnotation = mapAnnotation(resultSet, tableAccessStrategy, TableAccessStrategy.NODE_ANNOTATION_TABLE);
if (nodeAnnotation != null) {
node.addNodeAnnotation(nodeAnnotation);
}
Annotation edgeAnnotation = mapAnnotation(resultSet, tableAccessStrategy, TableAccessStrategy.EDGE_ANNOTATION_TABLE);
if (edgeAnnotation != null) {
edge.addAnnotation(edgeAnnotation);
}
rowNum++;
}
// remove edges from the graph with a source node inside the match
for (Entry<List<Long>, AnnotationGraph> entry : graphByMatchGroup.entrySet()) {
AnnotationGraph graph = entry.getValue();
ListIterator<Edge> itEdge = graph.getEdges().listIterator();
while (itEdge.hasNext()) {
Edge edge = itEdge.next();
if (edge.getSource() == null) {
edge.getDestination().getIncomingEdges().remove(edge);
itEdge.remove();
}
}
Map<Long, ComponentEntry> spans = keyToSpanToComponent.get(entry.getKey());
// filter out the continuous spans by finding all discontinuous spans
// discontinuos spans will have a an entry for token
createMissingSpanningRelations(graph, spans, nodeById);
}
return graphs;
}
use of annis.model.Annotation in project ANNIS by korpling.
the class WekaHelper method exportArffHeader.
public static SortedMap<Integer, SortedSet<String>> exportArffHeader(Iterator<AnnotatedMatch> matches, PrintWriter w) {
// header: relation name (unused)
w.append("@relation name\n");
w.append("\n");
// figure out what annotations are used at each match position
SortedMap<Integer, SortedSet<String>> columnsByNodePos = new TreeMap<>();
while (matches.hasNext()) {
AnnotatedMatch match = matches.next();
for (int j = 0; j < match.size(); ++j) {
AnnotatedSpan span = match.get(j);
if (columnsByNodePos.get(j) == null) {
columnsByNodePos.put(j, new TreeSet<String>());
}
for (Annotation annotation : span.getAnnotations()) {
columnsByNodePos.get(j).add("anno_" + annotation.getQualifiedName());
}
for (Annotation meta : span.getMetadata()) {
columnsByNodePos.get(j).add("meta_" + meta.getQualifiedName());
}
}
}
// print column names and data types
int count = columnsByNodePos.keySet().size();
for (int j = 0; j < count; ++j) {
w.append("@attribute ").append(fullColumnName(j + 1, "id")).append(" string\n");
w.append("@attribute ").append(fullColumnName(j + 1, "span")).append(" string\n");
SortedSet<String> annotationNames = columnsByNodePos.get(j);
for (String name : annotationNames) {
w.append("@attribute ").append(fullColumnName(j + 1, name)).append(" string\n");
}
}
return columnsByNodePos;
}
Aggregations