use of annis.model.Annotation in project ANNIS by korpling.
the class HTMLVis method createHTML.
public String createHTML(SDocumentGraph graph, VisualizationDefinition[] definitions) {
HashMap<VisualizationDefinition, Integer> instruction_priorities = new HashMap<>();
SortedMap<Long, List<OutputItem>> outputStartTags = new TreeMap<>();
SortedMap<Long, List<OutputItem>> outputEndTags = new TreeMap<>();
StringBuilder sb = new StringBuilder();
List<SToken> token = graph.getSortedTokenByText();
// Get metadata for visualizer if stylesheet requires it
// First check the stylesheet
Boolean bolMetaTypeFound = false;
HashMap<String, String> meta = new HashMap<>();
int def_priority = 0;
for (VisualizationDefinition vis : definitions) {
if (vis.getOutputter().getType() == SpanHTMLOutputter.Type.META_NAME) {
bolMetaTypeFound = true;
} else // not a meta-annotation, remember order in config file to set priority
{
if (vis.getMatcher() instanceof AnnotationNameMatcher) {
instruction_priorities.put(vis, def_priority);
} else if (vis.getMatcher() instanceof AnnotationNameAndValueMatcher) {
instruction_priorities.put(vis, def_priority);
} else if (vis.getMatcher() instanceof TokenMatcher) {
instruction_priorities.put(vis, def_priority);
}
def_priority--;
}
vis.getOutputter().setMeta(meta);
}
if (// Metadata is required, get corpus and document name
bolMetaTypeFound == true) {
// Get corpus and document name
String strDocName = "";
String strCorpName = "";
strDocName = graph.getDocument().getName();
List<String> corpusPath = CommonHelper.getCorpusPath(graph.getDocument().getGraph(), graph.getDocument());
strCorpName = corpusPath.get(corpusPath.size() - 1);
// Get metadata and put in hashmap
List<Annotation> metaData = Helper.getMetaDataDoc(strCorpName, strDocName);
for (Annotation metaDatum : metaData) {
meta.put(metaDatum.getName(), metaDatum.getValue());
}
}
for (SToken t : token) {
tokenColor = "";
if (mc.containsKey(t) && hitMark) {
tokenColor = MatchedNodeColors.getHTMLColorByMatch(mc.get(t));
}
for (VisualizationDefinition vis : definitions) {
String matched = vis.getMatcher().matchedAnnotation(t);
if (matched != null) {
vis.getOutputter().outputHTML(t, matched, outputStartTags, outputEndTags, tokenColor, Objects.firstNonNull(instruction_priorities.get(vis), 0));
}
}
}
List<SSpan> spans = graph.getSpans();
for (VisualizationDefinition vis : definitions) {
for (SSpan span : spans) {
tokenColor = "";
if (mc.containsKey(span) && hitMark) {
tokenColor = MatchedNodeColors.getHTMLColorByMatch(mc.get(span));
}
String matched = vis.getMatcher().matchedAnnotation(span);
if (matched != null) {
vis.getOutputter().outputHTML(span, matched, outputStartTags, outputEndTags, tokenColor, Objects.firstNonNull(instruction_priorities.get(vis), 0));
}
}
}
int minStartTagPos = outputStartTags.firstKey().intValue();
int maxEndTagPos = outputEndTags.lastKey().intValue();
// Find BEGIN and END instructions if available
for (VisualizationDefinition vis : definitions) {
if (vis.getMatcher() instanceof PseudoRegionMatcher) {
PseudoRegionMatcher.PseudoRegion psdRegionType = ((PseudoRegionMatcher) vis.getMatcher()).getPsdRegion();
int positionStart = 0;
int positionEnd = 0;
if (!outputEndTags.isEmpty() && !outputStartTags.isEmpty() && psdRegionType != null) {
switch(psdRegionType) {
case BEGIN:
positionStart = positionEnd = Integer.MIN_VALUE;
// def_priority is now lower than all normal annotation
instruction_priorities.put(vis, def_priority);
break;
case END:
positionStart = positionEnd = Integer.MAX_VALUE;
// def_priority is now lower than all normal annotation
instruction_priorities.put(vis, def_priority);
break;
case ALL:
// use same position as last and first key
positionStart = minStartTagPos;
positionEnd = maxEndTagPos;
// The ALL pseudo-range must enclose everything, thus it get the
// priority which is one lower than the smallest non BEGIN/END
// priority.
instruction_priorities.put(vis, def_priority);
break;
default:
break;
}
}
switch(vis.getOutputter().getType()) {
case META_NAME:
String strMetaVal = meta.get(vis.getOutputter().getMetaname().trim());
if (strMetaVal == null) {
throw new NullPointerException("no such metadata name in document: '" + vis.getOutputter().getMetaname().trim() + "'");
} else {
vis.getOutputter().outputAny(positionStart, positionEnd, ((PseudoRegionMatcher) vis.getMatcher()).getAnnotationName(), strMetaVal, outputStartTags, outputEndTags, Objects.firstNonNull(instruction_priorities.get(vis), 0));
}
break;
case CONSTANT:
vis.getOutputter().outputAny(positionStart, positionEnd, ((PseudoRegionMatcher) vis.getMatcher()).getAnnotationName(), vis.getOutputter().getConstant(), outputStartTags, outputEndTags, Objects.firstNonNull(instruction_priorities.get(vis), 0));
break;
case EMPTY:
vis.getOutputter().outputAny(positionStart, positionEnd, ((PseudoRegionMatcher) vis.getMatcher()).getAnnotationName(), "", outputStartTags, outputEndTags, Objects.firstNonNull(instruction_priorities.get(vis), 0));
break;
case ANNO_NAME:
// this shouldn't happen, since the BEGIN/END instruction has no triggering annotation name or value
break;
case VALUE:
// this shouldn't happen, since the BEGIN/END instruction has no triggering annotation name or value
break;
case ESCAPED_VALUE:
// this shouldn't happen, since the BEGIN/END instruction has no triggering annotation name or value
break;
default:
}
}
}
// get all used indexes
Set<Long> indexes = new TreeSet<>();
indexes.addAll(outputStartTags.keySet());
indexes.addAll(outputEndTags.keySet());
for (Long i : indexes) {
// output all strings belonging to this token position
// first the start tags for this position
// add priorities from instruction_priorities for sorting length ties
List<OutputItem> unsortedStart = outputStartTags.get(i);
SortedSet<OutputItem> itemsStart = new TreeSet();
if (unsortedStart != null) {
Iterator<OutputItem> it = unsortedStart.iterator();
while (it.hasNext()) {
OutputItem s = it.next();
itemsStart.add(s);
}
}
{
Iterator<OutputItem> it = itemsStart.iterator();
boolean first = true;
while (it.hasNext()) {
OutputItem s = it.next();
if (!first) {
sb.append("-->");
}
first = false;
sb.append(s.getOutputString());
if (it.hasNext()) {
sb.append("<!--\n");
}
}
}
// then the end tags for this position, but inverse their order
List<OutputItem> unsortedEnd = outputEndTags.get(i);
SortedSet<OutputItem> itemsEnd = new TreeSet();
if (unsortedEnd != null) {
Iterator<OutputItem> it = unsortedEnd.iterator();
while (it.hasNext()) {
OutputItem s = it.next();
itemsEnd.add(s);
}
}
{
List<OutputItem> itemsEndReverse = new LinkedList<>(itemsEnd);
Collections.reverse(itemsEndReverse);
for (OutputItem s : itemsEndReverse) {
sb.append(s.getOutputString());
}
}
}
return sb.toString();
}
use of annis.model.Annotation in project ANNIS by korpling.
the class QueryDaoImpl method exportCorpus.
@Override
@Transactional(readOnly = true)
public void exportCorpus(String toplevelCorpus, File outputDirectory) {
// check if the corpus really exists
mapCorpusNameToId(toplevelCorpus);
SaltProject corpusProject = SaltFactory.createSaltProject();
SCorpusGraph corpusGraph = SaltFactory.createSCorpusGraph();
corpusGraph.setSaltProject(corpusProject);
SCorpus rootCorpus = corpusGraph.createCorpus(null, toplevelCorpus);
// add all root metadata
for (Annotation metaAnno : listCorpusAnnotations(toplevelCorpus)) {
rootCorpus.createMetaAnnotation(metaAnno.getNamespace(), metaAnno.getName(), metaAnno.getValue());
}
File documentRootDir = new File(outputDirectory, toplevelCorpus);
if (!outputDirectory.exists()) {
if (!outputDirectory.mkdirs()) {
log.warn("Could not create output directory \"{}\" for exporting the corpus", outputDirectory.getAbsolutePath());
}
}
List<Annotation> docs = listDocuments(toplevelCorpus);
int i = 1;
for (Annotation docAnno : docs) {
log.info("Loading document {} from database ({}/{})", docAnno.getName(), i, docs.size());
SaltProject docProject = retrieveAnnotationGraph(toplevelCorpus, docAnno.getName(), null);
if (docProject != null && docProject.getCorpusGraphs() != null && !docProject.getCorpusGraphs().isEmpty()) {
List<Annotation> docMetaData = listCorpusAnnotations(toplevelCorpus, docAnno.getName(), true);
SCorpusGraph docCorpusGraph = docProject.getCorpusGraphs().get(0);
// TODO: we could re-use the actual corpus structure instead of just adding a flat list of documents
if (docCorpusGraph.getDocuments() != null) {
for (SDocument doc : docCorpusGraph.getDocuments()) {
log.info("Removing SFeatures from {} ({}/{})", docAnno.getName(), i, docs.size());
// remove all ANNIS specific features that require a special Java class
SDocumentGraph graph = doc.getDocumentGraph();
if (graph != null) {
if (graph.getNodes() != null) {
for (SNode n : graph.getNodes()) {
n.removeLabel(AnnisConstants.ANNIS_NS, AnnisConstants.FEAT_RELANNIS_NODE);
}
}
if (graph.getRelations() != null) {
for (SRelation e : graph.getRelations()) {
e.removeLabel(AnnisConstants.ANNIS_NS, AnnisConstants.FEAT_RELANNIS_EDGE);
}
}
}
log.info("Saving document {} ({}/{})", doc.getName(), i, docs.size());
SaltUtil.saveDocumentGraph(graph, URI.createFileURI(new File(documentRootDir, doc.getName() + "." + SaltUtil.FILE_ENDING_SALT_XML).getAbsolutePath()));
SDocument docCopy = corpusGraph.createDocument(rootCorpus, doc.getName());
log.info("Adding metadata to document {} ({}/{})", doc.getName(), i, docs.size());
for (Annotation metaAnno : docMetaData) {
docCopy.createMetaAnnotation(metaAnno.getNamespace(), metaAnno.getName(), metaAnno.getValue());
}
}
}
}
i++;
}
// end for each document
// save the actual SaltProject
log.info("Saving corpus structure");
File projectFile = new File(outputDirectory, SaltUtil.FILE_SALT_PROJECT);
SaltXML10Writer writer = new SaltXML10Writer(projectFile);
writer.writeSaltProject(corpusProject);
}
use of annis.model.Annotation in project ANNIS by korpling.
the class CSVHelper method exportCSVData.
public static void exportCSVData(Iterator<AnnotatedMatch> matches, SortedMap<Integer, SortedSet<String>> columnsByNodePos, PrintWriter w) {
int count = columnsByNodePos.keySet().size();
// print values
while (matches.hasNext()) {
AnnotatedMatch match = matches.next();
List<String> line = new ArrayList<>();
int k = 0;
for (; k < match.size(); ++k) {
AnnotatedSpan span = match.get(k);
Map<String, String> valueByName = new HashMap<>();
if (span != null) {
if (span.getAnnotations() != null) {
for (Annotation annotation : span.getAnnotations()) {
valueByName.put("anno_" + annotation.getQualifiedName(), annotation.getValue());
}
}
if (span.getMetadata() != null) {
for (Annotation meta : span.getMetadata()) {
valueByName.put("meta_" + meta.getQualifiedName(), meta.getValue());
}
}
line.add("" + span.getId());
line.add(span.getCoveredText().replace("\t", "\\t"));
}
for (String name : columnsByNodePos.get(k)) {
if (valueByName.containsKey(name)) {
line.add(valueByName.get(name).replace("\t", "\\t"));
} else {
line.add("'NULL'");
}
}
}
for (int l = k; l < count; ++l) {
line.add("'NULL'");
for (int m = 0; m <= columnsByNodePos.get(l).size(); ++m) {
line.add("'NULL'");
}
}
w.append(StringUtils.join(line, "\t"));
w.append("\n");
}
}
use of annis.model.Annotation in project ANNIS by korpling.
the class CSVHelper method exportCSVHeader.
public static SortedMap<Integer, SortedSet<String>> exportCSVHeader(Iterator<AnnotatedMatch> matches, PrintWriter w) {
// figure out what annotations are used at each match position
SortedMap<Integer, SortedSet<String>> columnsByNodePos = new TreeMap<>();
while (matches.hasNext()) {
AnnotatedMatch match = matches.next();
for (int j = 0; j < match.size(); ++j) {
AnnotatedSpan span = match.get(j);
if (columnsByNodePos.get(j) == null) {
columnsByNodePos.put(j, new TreeSet<String>());
}
if (span != null) {
for (Annotation annotation : span.getAnnotations()) {
columnsByNodePos.get(j).add("anno_" + annotation.getQualifiedName());
}
for (Annotation meta : span.getMetadata()) {
columnsByNodePos.get(j).add("meta_" + meta.getQualifiedName());
}
}
}
}
// important: don't close the wrapper CSVWriter!
@SuppressWarnings("resource") CSVWriter csvWriter = new CSVWriter(w, '\t', CSVWriter.NO_QUOTE_CHARACTER, '\\');
// print column names and data types
int count = columnsByNodePos.keySet().size();
ArrayList<String> headerLine = new ArrayList<>();
for (int j = 0; j < count; ++j) {
headerLine.add(fullColumnName(j + 1, "id"));
headerLine.add(fullColumnName(j + 1, "span"));
SortedSet<String> annotationNames = columnsByNodePos.get(j);
for (String name : annotationNames) {
headerLine.add(fullColumnName(j + 1, name));
}
}
csvWriter.writeNext(headerLine.toArray(new String[headerLine.size()]));
return columnsByNodePos;
}
use of annis.model.Annotation in project ANNIS by korpling.
the class WekaHelper method exportArffData.
public static void exportArffData(Iterator<AnnotatedMatch> matches, SortedMap<Integer, SortedSet<String>> columnsByNodePos, PrintWriter w) {
int count = columnsByNodePos.keySet().size();
w.append("\n@data\n\n");
// print values
while (matches.hasNext()) {
AnnotatedMatch match = matches.next();
List<String> line = new ArrayList<>();
int k = 0;
for (; k < match.size(); ++k) {
AnnotatedSpan span = match.get(k);
Map<String, String> valueByName = new HashMap<>();
if (span != null) {
if (span.getAnnotations() != null) {
for (Annotation annotation : span.getAnnotations()) {
valueByName.put("anno_" + annotation.getQualifiedName(), annotation.getValue());
}
}
if (span.getMetadata() != null) {
for (Annotation meta : span.getMetadata()) {
valueByName.put("meta_" + meta.getQualifiedName(), meta.getValue());
}
}
line.add("'" + span.getId() + "'");
line.add("'" + span.getCoveredText().replace("'", "\\'") + "'");
}
for (String name : columnsByNodePos.get(k)) {
if (valueByName.containsKey(name)) {
line.add("'" + valueByName.get(name).replace("'", "\\'") + "'");
} else {
line.add("'NULL'");
}
}
}
for (int l = k; l < count; ++l) {
line.add("'NULL'");
for (int m = 0; m <= columnsByNodePos.get(l).size(); ++m) {
line.add("'NULL'");
}
}
w.append(StringUtils.join(line, ","));
w.append("\n");
}
}
Aggregations