use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.
the class CorefVisualizer method writeOutput.
/**
* writes Output for the CorefVisualizer
* @param writer writer to write with
*/
@Override
public void writeOutput(VisualizerInput input, Writer w) {
// root html element
Html html = new Html();
Head head = new Head();
Body body = new Body();
html.removeXmlns();
html.appendChild(head);
html.appendChild(body);
try {
LinkedList<String> fonts = new LinkedList<String>();
if (input.getFont() != null) {
Link linkFont = new Link();
linkFont.setHref(input.getFont().getUrl());
head.appendChild(linkFont);
fonts.add(input.getFont().getName());
}
fonts.add("serif");
Link linkJQueryUI = new Link();
linkJQueryUI.setHref(input.getResourcePath("coref/jquery-ui-1.11.4.custom/jquery-ui.min.css"));
linkJQueryUI.setRel("stylesheet");
linkJQueryUI.setType("text/css");
head.appendChild(linkJQueryUI);
Link linkJQueryUIStructure = new Link();
linkJQueryUIStructure.setHref(input.getResourcePath("coref/jquery-ui-1.11.4.custom/jquery-ui.structure.min.css"));
linkJQueryUIStructure.setRel("stylesheet");
linkJQueryUIStructure.setType("text/css");
head.appendChild(linkJQueryUIStructure);
Script scriptJquery = new Script("text/javascript");
scriptJquery.setSrc(input.getResourcePath("coref/jquery.js"));
head.appendChild(scriptJquery);
Script scriptUI = new Script("text/javascript");
scriptUI.setSrc(input.getResourcePath("coref/jquery-ui-1.11.4.custom/jquery-ui.min.js"));
head.appendChild(scriptUI);
Link linkCoref = new Link();
linkCoref.setHref(input.getResourcePath("coref/coref.css"));
linkCoref.setRel("stylesheet");
linkCoref.setType("text/css");
head.appendChild(linkCoref);
Script scriptCoref = new Script("text/javascript");
scriptCoref.setSrc(input.getResourcePath("coref/CorefVisualizer.js"));
head.appendChild(scriptCoref);
body.setStyle("font-family: '" + StringUtils.join(fonts, "', '") + "';");
// get Info
globalIndex = 0;
tokensOfNode = new HashMap<String, List<String>>();
referentList = new LinkedList<TReferent>();
komponent = new LinkedList<TComponent>();
referentOfToken = new HashMap<String, HashMap<Long, Integer>>();
componentOfToken = new HashMap<String, List<Long>>();
componenttype = new LinkedList<TComponenttype>();
SDocument saltDoc = input.getDocument();
SDocumentGraph saltGraph = saltDoc.getDocumentGraph();
if (saltGraph == null) {
body.setText("An Error occured: Could not get Graph of Result (Graph == null).");
return;
}
List<SRelation<SNode, SNode>> edgeList = saltGraph.getRelations();
if (edgeList == null) {
return;
}
for (SRelation rawRel : edgeList) {
if (includeEdge(rawRel, input.getNamespace())) {
SPointingRelation rel = (SPointingRelation) rawRel;
String relType = componentNameForRelation(rel);
visitedNodes = new LinkedList<String>();
// got type for this?
boolean gotIt = false;
int componentnr;
for (componentnr = 0; componentnr < componenttype.size(); componentnr++) {
if (componenttype.get(componentnr) != null && componenttype.get(componentnr).type != null && componenttype.get(componentnr).nodeList != null && componenttype.get(componentnr).type.equals(relType) && componenttype.get(componentnr).nodeList.contains(rel.getSource().getId())) {
gotIt = true;
break;
}
}
TComponent currentComponent;
TComponenttype currentComponenttype;
if (gotIt) {
currentComponent = komponent.get(componentnr);
currentComponenttype = componenttype.get(componentnr);
} else {
currentComponenttype = new TComponenttype();
currentComponenttype.type = relType;
componenttype.add(currentComponenttype);
componentnr = komponent.size();
currentComponent = new TComponent();
currentComponent.type = relType;
currentComponent.tokenList = new LinkedList<String>();
komponent.add(currentComponent);
currentComponenttype.nodeList.add(rel.getSource().getId());
}
TReferent ref = new TReferent();
ref.annotations = new HashSet<SerializableAnnotation>();
for (SAnnotation anno : rel.getAnnotations()) {
ref.annotations.add(new SerializableAnnotation(anno));
}
ref.component = componentnr;
referentList.add(ref);
List<String> currentTokens = getAllTokens(rel.getSource(), componentNameForRelation(rel), currentComponenttype, componentnr, input.getNamespace());
// neu
setReferent(rel.getTarget(), globalIndex, 0);
// neu
setReferent(rel.getSource(), globalIndex, 1);
for (String s : currentTokens) {
if (!currentComponent.tokenList.contains(s)) {
currentComponent.tokenList.add(s);
}
}
globalIndex++;
}
}
colorlist = new HashMap<Integer, Integer>();
// A list containing all the generated HTML elements, one list entry
// for each text.
List<List<Node>> nodesPerText = new LinkedList<List<Node>>();
// write output for each text separatly
List<STextualDS> texts = saltGraph.getTextualDSs();
if (texts != null && !texts.isEmpty()) {
for (STextualDS t : texts) {
DataSourceSequence<Integer> sequence = new DataSourceSequence<>(t, 0, (t.getText() != null) ? t.getText().length() : 0);
List<SToken> token = saltGraph.getSortedTokenByText(saltGraph.getTokensBySequence(sequence));
if (token != null) {
boolean validText = true;
if (Boolean.parseBoolean(input.getMappings().getProperty("hide_empty", "false"))) {
validText = false;
// check if the text contains any matching annotations
for (SToken tok : token) {
/*
* The token is only added to this map if an valid edge
* (according to the resolver trigger) conntected to
* this token was found.
*/
if (referentOfToken.get(tok.getId()) != null && !referentOfToken.get(tok.getId()).isEmpty()) {
validText = true;
break;
}
}
}
if (validText) {
List<Node> nodes = outputSingleText(token, input);
nodesPerText.add(nodes);
}
}
}
// end for each STexutalDS
/*
* Append the generated output to the body, wrap in table if necessary.
*/
// present all texts as columns side by side if using multiple texts
Table tableTexts = new Table();
Tr trTextRow = new Tr();
trTextRow.setCSSClass("textRow");
// only append wrapper table if we have multiple texts
if (nodesPerText.size() > 1) {
body.appendChild(tableTexts);
tableTexts.appendChild(trTextRow);
}
for (List<Node> nodes : nodesPerText) {
// multi-text mode?
if (nodesPerText.size() > 1) {
Td tdSingleText = new Td();
trTextRow.appendChild(tdSingleText);
tdSingleText.setCSSClass("text");
tdSingleText.appendChild(nodes);
} else {
body.appendChild(nodes);
}
}
} else {
Text errorTxt = new Text("Could not find any texts for the " + input.getNamespace() + " node namespace (layer).");
body.appendChild(errorTxt);
}
// write HTML4 transitional doctype
w.append(new Doctype(DocumentType.HTMLTransitional).write());
// append the html tree
w.append(html.write());
} catch (IOException ex) {
log.error(null, ex);
}
}
use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.
the class HTMLVis method createHTML.
public String createHTML(SDocumentGraph graph, VisualizationDefinition[] definitions) {
HashMap<VisualizationDefinition, Integer> instruction_priorities = new HashMap<>();
SortedMap<Long, List<OutputItem>> outputStartTags = new TreeMap<>();
SortedMap<Long, List<OutputItem>> outputEndTags = new TreeMap<>();
StringBuilder sb = new StringBuilder();
List<SToken> token = graph.getSortedTokenByText();
// Get metadata for visualizer if stylesheet requires it
// First check the stylesheet
Boolean bolMetaTypeFound = false;
HashMap<String, String> meta = new HashMap<>();
int def_priority = 0;
for (VisualizationDefinition vis : definitions) {
if (vis.getOutputter().getType() == SpanHTMLOutputter.Type.META_NAME) {
bolMetaTypeFound = true;
} else // not a meta-annotation, remember order in config file to set priority
{
if (vis.getMatcher() instanceof AnnotationNameMatcher) {
instruction_priorities.put(vis, def_priority);
} else if (vis.getMatcher() instanceof AnnotationNameAndValueMatcher) {
instruction_priorities.put(vis, def_priority);
} else if (vis.getMatcher() instanceof TokenMatcher) {
instruction_priorities.put(vis, def_priority);
}
def_priority--;
}
vis.getOutputter().setMeta(meta);
}
if (// Metadata is required, get corpus and document name
bolMetaTypeFound == true) {
// Get corpus and document name
String strDocName = "";
String strCorpName = "";
strDocName = graph.getDocument().getName();
List<String> corpusPath = CommonHelper.getCorpusPath(graph.getDocument().getGraph(), graph.getDocument());
strCorpName = corpusPath.get(corpusPath.size() - 1);
// Get metadata and put in hashmap
List<Annotation> metaData = Helper.getMetaDataDoc(strCorpName, strDocName);
for (Annotation metaDatum : metaData) {
meta.put(metaDatum.getName(), metaDatum.getValue());
}
}
for (SToken t : token) {
tokenColor = "";
if (mc.containsKey(t) && hitMark) {
tokenColor = MatchedNodeColors.getHTMLColorByMatch(mc.get(t));
}
for (VisualizationDefinition vis : definitions) {
String matched = vis.getMatcher().matchedAnnotation(t);
if (matched != null) {
vis.getOutputter().outputHTML(t, matched, outputStartTags, outputEndTags, tokenColor, Objects.firstNonNull(instruction_priorities.get(vis), 0));
}
}
}
List<SSpan> spans = graph.getSpans();
for (VisualizationDefinition vis : definitions) {
for (SSpan span : spans) {
tokenColor = "";
if (mc.containsKey(span) && hitMark) {
tokenColor = MatchedNodeColors.getHTMLColorByMatch(mc.get(span));
}
String matched = vis.getMatcher().matchedAnnotation(span);
if (matched != null) {
vis.getOutputter().outputHTML(span, matched, outputStartTags, outputEndTags, tokenColor, Objects.firstNonNull(instruction_priorities.get(vis), 0));
}
}
}
int minStartTagPos = outputStartTags.firstKey().intValue();
int maxEndTagPos = outputEndTags.lastKey().intValue();
// Find BEGIN and END instructions if available
for (VisualizationDefinition vis : definitions) {
if (vis.getMatcher() instanceof PseudoRegionMatcher) {
PseudoRegionMatcher.PseudoRegion psdRegionType = ((PseudoRegionMatcher) vis.getMatcher()).getPsdRegion();
int positionStart = 0;
int positionEnd = 0;
if (!outputEndTags.isEmpty() && !outputStartTags.isEmpty() && psdRegionType != null) {
switch(psdRegionType) {
case BEGIN:
positionStart = positionEnd = Integer.MIN_VALUE;
// def_priority is now lower than all normal annotation
instruction_priorities.put(vis, def_priority);
break;
case END:
positionStart = positionEnd = Integer.MAX_VALUE;
// def_priority is now lower than all normal annotation
instruction_priorities.put(vis, def_priority);
break;
case ALL:
// use same position as last and first key
positionStart = minStartTagPos;
positionEnd = maxEndTagPos;
// The ALL pseudo-range must enclose everything, thus it get the
// priority which is one lower than the smallest non BEGIN/END
// priority.
instruction_priorities.put(vis, def_priority);
break;
default:
break;
}
}
switch(vis.getOutputter().getType()) {
case META_NAME:
String strMetaVal = meta.get(vis.getOutputter().getMetaname().trim());
if (strMetaVal == null) {
throw new NullPointerException("no such metadata name in document: '" + vis.getOutputter().getMetaname().trim() + "'");
} else {
vis.getOutputter().outputAny(positionStart, positionEnd, ((PseudoRegionMatcher) vis.getMatcher()).getAnnotationName(), strMetaVal, outputStartTags, outputEndTags, Objects.firstNonNull(instruction_priorities.get(vis), 0));
}
break;
case CONSTANT:
vis.getOutputter().outputAny(positionStart, positionEnd, ((PseudoRegionMatcher) vis.getMatcher()).getAnnotationName(), vis.getOutputter().getConstant(), outputStartTags, outputEndTags, Objects.firstNonNull(instruction_priorities.get(vis), 0));
break;
case EMPTY:
vis.getOutputter().outputAny(positionStart, positionEnd, ((PseudoRegionMatcher) vis.getMatcher()).getAnnotationName(), "", outputStartTags, outputEndTags, Objects.firstNonNull(instruction_priorities.get(vis), 0));
break;
case ANNO_NAME:
// this shouldn't happen, since the BEGIN/END instruction has no triggering annotation name or value
break;
case VALUE:
// this shouldn't happen, since the BEGIN/END instruction has no triggering annotation name or value
break;
case ESCAPED_VALUE:
// this shouldn't happen, since the BEGIN/END instruction has no triggering annotation name or value
break;
default:
}
}
}
// get all used indexes
Set<Long> indexes = new TreeSet<>();
indexes.addAll(outputStartTags.keySet());
indexes.addAll(outputEndTags.keySet());
for (Long i : indexes) {
// output all strings belonging to this token position
// first the start tags for this position
// add priorities from instruction_priorities for sorting length ties
List<OutputItem> unsortedStart = outputStartTags.get(i);
SortedSet<OutputItem> itemsStart = new TreeSet();
if (unsortedStart != null) {
Iterator<OutputItem> it = unsortedStart.iterator();
while (it.hasNext()) {
OutputItem s = it.next();
itemsStart.add(s);
}
}
{
Iterator<OutputItem> it = itemsStart.iterator();
boolean first = true;
while (it.hasNext()) {
OutputItem s = it.next();
if (!first) {
sb.append("-->");
}
first = false;
sb.append(s.getOutputString());
if (it.hasNext()) {
sb.append("<!--\n");
}
}
}
// then the end tags for this position, but inverse their order
List<OutputItem> unsortedEnd = outputEndTags.get(i);
SortedSet<OutputItem> itemsEnd = new TreeSet();
if (unsortedEnd != null) {
Iterator<OutputItem> it = unsortedEnd.iterator();
while (it.hasNext()) {
OutputItem s = it.next();
itemsEnd.add(s);
}
}
{
List<OutputItem> itemsEndReverse = new LinkedList<>(itemsEnd);
Collections.reverse(itemsEndReverse);
for (OutputItem s : itemsEndReverse) {
sb.append(s.getOutputString());
}
}
}
return sb.toString();
}
use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.
the class VakyarthaDependencyTree method selectNode.
/**
* If the {@link VakyarthaDependencyTree#MAPPING_NODE_KEY} is set, then the
* value of this mapping is used for selecting the SNode. If the mapping is
* not set, it falls back to the default behaviour and only SToken are are
* selected.
*/
private boolean selectNode(SNode n) {
String annoKey = null;
if (mappings.containsKey(MAPPING_NODE_KEY)) {
annoKey = mappings.getProperty(MAPPING_NODE_KEY);
}
/**
* Default behaviour, when mapping is not set correctly or is not set at
* all.
*/
if (annoKey == null) {
if (n instanceof SToken) {
return true;
} else {
return false;
}
}
// if mapping is set, we check, if the node carries the mapped annotation key
Set<SAnnotation> annos = n.getAnnotations();
for (SAnnotation a : annos) {
if (annoKey.equals(a.getName())) {
return true;
}
}
return false;
}
use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.
the class AutoSimpleRegexQuery method analyzingQuery.
@Override
public void analyzingQuery(SaltProject saltProject) {
List<String> tokens = new ArrayList<>();
for (SCorpusGraph g : saltProject.getCorpusGraphs()) {
if (g != null) {
for (SDocument doc : g.getDocuments()) {
SDocumentGraph docGraph = doc.getDocumentGraph();
List<SNode> sNodes = docGraph.getNodes();
if (sNodes != null) {
for (SNode n : sNodes) {
if (n instanceof SToken) {
tokens.add(CommonHelper.getSpannedText((SToken) n));
}
}
}
}
}
}
// try to find a word with which is contained twice with Capitalize letter.
text = null;
for (int i = 0; i < tokens.size(); i++) {
for (int j = i + 1; j < tokens.size(); j++) {
if (tokens.get(i).equalsIgnoreCase(tokens.get(j))) {
if (tokens.get(i).length() > 1 && ((Character.isLowerCase(tokens.get(i).charAt(0)) && Character.isUpperCase(tokens.get(j).charAt(0))) || (Character.isLowerCase(tokens.get(j).charAt(0)) && Character.isUpperCase(tokens.get(i).charAt(0))))) {
text = tokens.get(i);
break;
}
}
}
}
if (text != null) {
Character upperLetter = Character.toUpperCase(text.charAt(0));
Character lowerLetter = Character.toLowerCase(text.charAt(0));
String rest = StringUtils.substring(text, -(text.length() - 1));
finalAQL = "/[" + upperLetter + lowerLetter + "]" + rest + "/";
} else {
// select one random token from the result
int tries = 10;
int r = new Random().nextInt(tokens.size() - 1);
text = tokens.get(r);
while ("".equals(text) && tries > 0) {
r = new Random().nextInt(tokens.size() - 1);
text = tokens.get(r);
tries--;
}
if (!"".equals(text) && text.length() > 1) {
Character upperLetter = Character.toUpperCase(text.charAt(0));
Character lowerLetter = Character.toLowerCase(text.charAt(0));
String rest = StringUtils.substring(text, -(text.length() - 1));
finalAQL = "/[" + upperLetter + lowerLetter + "]" + rest + "/";
} else {
finalAQL = "";
}
}
}
use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.
the class SaltAnnotateExtractor method createSToken.
private SToken createSToken(long tokenIndex, ResultSet resultSet, TreeMap<Long, String> tokenTexts, TreeMap<Long, SToken> tokenByIndex) throws SQLException {
SToken tok = SaltFactory.createSToken();
// get spanned text of token
tokenTexts.put(tokenIndex, stringValue(resultSet, NODE_TABLE, "span"));
tokenByIndex.put(tokenIndex, tok);
return tok;
}
Aggregations