use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.
the class SaltAnnotateExtractor method createSinglePrimaryText.
private void createSinglePrimaryText(SDocumentGraph graph, long textID, TreeMap<Long, String> tokenTexts, TreeMap<Long, SToken> tokenByIndex) {
STextualDS textDataSource = SaltFactory.createSTextualDS();
textDataSource.setName("sText" + textID);
graph.addNode(textDataSource);
StringBuilder sbText = new StringBuilder();
Iterator<Map.Entry<Long, String>> itToken = tokenTexts.entrySet().iterator();
long index = 0;
while (itToken.hasNext()) {
Map.Entry<Long, String> e = itToken.next();
SToken tok = tokenByIndex.get(e.getKey());
SFeature rawFeature = tok.getFeature(SaltUtil.createQName(ANNIS_NS, FEAT_RELANNIS_NODE));
if (rawFeature != null) {
RelannisNodeFeature feat = (RelannisNodeFeature) rawFeature.getValue();
if (feat.getTextRef() == textID) {
STextualRelation textRel = SaltFactory.createSTextualRelation();
textRel.setSource(tok);
textRel.setTarget(textDataSource);
textRel.setStart(sbText.length());
textRel.setEnd(sbText.length() + e.getValue().length());
textRel.setName("sTextRel" + textID + "_" + (index++));
textRel.setTarget(textDataSource);
graph.addRelation(textRel);
sbText.append(e.getValue());
if (itToken.hasNext()) {
sbText.append(" ");
}
}
}
}
textDataSource.setText(sbText.toString());
}
use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.
the class SaltAnnotateExtractor method createNewRelation.
private SRelation createNewRelation(SDocumentGraph graph, SStructuredNode sourceNode, SNode targetNode, String relationName, String type, long componentID, SLayer layer, long pre, FastInverseMap<Long, SNode> nodeByRankID, AtomicInteger numberOfRelations) {
SRelation rel = null;
if (null != type) // create new relation
{
switch(type) {
case "d":
SDominanceRelation domrel = SaltFactory.createSDominanceRelation();
// always set a name by ourself since the SDocumentGraph#basicAddRelation()
// functions otherwise real slow
domrel.setName("sDomRel" + numberOfRelations.incrementAndGet());
rel = domrel;
if (sourceNode != null && !(sourceNode instanceof SStructure)) {
log.debug("Mismatched source type: should be SStructure");
SNode oldNode = sourceNode;
sourceNode = recreateNode(SStructure.class, sourceNode);
updateMapAfterRecreatingNode(oldNode, sourceNode, nodeByRankID);
}
if (relationName == null || relationName.isEmpty()) {
// layer but has a non-empty relation name
if (handleArtificialDominanceRelation(graph, sourceNode, targetNode, rel, layer, componentID, pre)) {
// don't include this relation
rel = null;
}
}
break;
case "c":
SSpanningRelation spanrel = SaltFactory.createSSpanningRelation();
// always set a name by ourself since the SDocumentGraph#basicAddRelation()
// functions is real slow otherwise
spanrel.setName("sSpanRel" + numberOfRelations.incrementAndGet());
rel = spanrel;
sourceNode = testAndFixNonSpan(sourceNode, nodeByRankID);
break;
case "p":
SPointingRelation pointingrel = SaltFactory.createSPointingRelation();
pointingrel.setName("sPointingRel" + numberOfRelations.incrementAndGet());
rel = pointingrel;
break;
default:
throw new IllegalArgumentException("Invalid type " + type + " for new Relation");
}
try {
if (rel != null) {
rel.setType(relationName);
RelannisEdgeFeature featRelation = new RelannisEdgeFeature();
featRelation.setPre(pre);
featRelation.setComponentID(componentID);
SFeature sfeatRelation = SaltFactory.createSFeature();
sfeatRelation.setNamespace(ANNIS_NS);
sfeatRelation.setName(FEAT_RELANNIS_EDGE);
sfeatRelation.setValue(featRelation);
rel.addFeature(sfeatRelation);
rel.setSource(sourceNode);
if ("c".equals(type) && !(targetNode instanceof SToken)) {
log.warn("invalid relation detected: target node ({}) " + "of a coverage relation (from: {}, internal id {}) was not a token", new Object[] { targetNode.getName(), sourceNode == null ? "null" : sourceNode.getName(), "" + pre });
} else {
rel.setTarget(targetNode);
graph.addRelation(rel);
layer.addRelation(rel);
}
}
} catch (SaltException ex) {
log.warn("invalid relation detected", ex);
}
}
return rel;
}
use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.
the class TextColumnExporter method createAdjacencyMatrix.
/**
* Implements the abstract method of the [SaltBasedExporter](\ref annis.gui.exporter.SaltBasedExporter).
* This method creates and fills an adjacency matrix of dimension (nodeCount x nodeCount), which keeps the relative order
* of match numbers to each other of each query result line. A result line is a part of a record, which belongs to a speaker.
*
* The adjacency matrix is a global two-dimensional array of integers, which allows to recognize the valid order of match numbers globally, after all query results are processed.
*
* @param graph an org.corpus_tools.salt.common.SDocumentGraph representation of a record
* @param args a map containing parameters like 'filter' or 'metakeys', set by user
* @param recordNumber the number of record within the record set returned for the user query
* @param nodeCount the count of distinct match numbers in the whole record set returned for the user query
*/
@Override
public void createAdjacencyMatrix(SDocumentGraph graph, Map<String, String> args, int recordNumber, int nodeCount) throws IOException {
String currSpeakerName = "";
String prevSpeakerName = "";
List<Long> matchNumbersOrdered = new ArrayList<Long>();
// if new search, reset adjacencyMatrix, extract parameters, set by user
if (recordNumber == 0) {
speakerHasMatches.clear();
speakerName = "";
tokenToMatchNumber.clear();
filterNumbersSetByUser.clear();
filterNumbersIsEmpty = true;
listOfMetakeys.clear();
adjacencyMatrix = new int[nodeCount][nodeCount];
matrixIsFilled = false;
singleMatchesGlobal.clear();
orderedMatchNumbersGlobal.clear();
matchNumbersGlobal.clear();
dataIsAlignable = true;
maxMatchesPerLine = 0;
// initialize adjacency matrix
for (int i = 0; i < adjacencyMatrix.length; i++) {
for (int j = 0; j < adjacencyMatrix[0].length; j++) {
adjacencyMatrix[i][j] = -1;
}
}
// extract filter numbers, if set
if (args.containsKey(FILTER_PARAMETER_KEYWORD)) {
String parameters = args.get(FILTER_PARAMETER_KEYWORD);
String[] numbers = parameters.split(PARAMETER_SEPARATOR);
for (int i = 0; i < numbers.length; i++) {
try {
Long number = Long.parseLong(numbers[i]);
filterNumbersSetByUser.add(number);
} catch (NumberFormatException e) {
;
}
}
}
if (!filterNumbersSetByUser.isEmpty()) {
filterNumbersIsEmpty = false;
}
// extract metakeys
if (args.containsKey(METAKEYS_KEYWORD)) {
String parameters = args.get(METAKEYS_KEYWORD);
String[] metakeys = parameters.split(PARAMETER_SEPARATOR);
for (int i = 0; i < metakeys.length; i++) {
String metakey = metakeys[i].trim();
listOfMetakeys.add(metakey);
}
}
}
if (graph != null) {
List<SToken> orderedToken = graph.getSortedTokenByText();
// iterate over all token
if (orderedToken != null) {
// reset counter over all the tokens
if (recordNumber == 0) {
counterGlobal = 0;
}
// iterate first time over tokens to figure out which speaker has matches and to recognize the hierarchical structure of matches as well
for (SToken token : orderedToken) {
counterGlobal++;
String name;
if ((name = CommonHelper.getTextualDSForNode(token, graph).getName()) == null) {
name = "";
}
speakerName = (recordNumber + 1) + "_" + name;
currSpeakerName = speakerName;
// reset data structures for new speaker
if (!currSpeakerName.equals(prevSpeakerName)) {
matchNumbersOrdered.clear();
}
if (!speakerHasMatches.containsKey(currSpeakerName)) {
speakerHasMatches.put(currSpeakerName, false);
}
List<SNode> root = new LinkedList<>();
root.add(token);
IsDominatedByMatch traverserSpeakerSearch = new IsDominatedByMatch();
// reset list
dominatedMatchCodes.clear();
graph.traverse(root, GRAPH_TRAVERSE_TYPE.BOTTOM_UP_DEPTH_FIRST, TRAV_PREPROCESSING, traverserSpeakerSearch);
if (!dominatedMatchCodes.isEmpty()) {
// if filter numbers not set by user, take the number of the highest match node
if (filterNumbersIsEmpty) {
tokenToMatchNumber.put(counterGlobal, dominatedMatchCodes.get(dominatedMatchCodes.size() - 1));
// set filter number to the ordered list
if (!matchNumbersOrdered.contains(dominatedMatchCodes.get(dominatedMatchCodes.size() - 1))) {
matchNumbersOrdered.add(dominatedMatchCodes.get(dominatedMatchCodes.size() - 1));
}
} else {
// take the highest match code, which is present in filterNumbers
boolean filterNumberFound = false;
for (int i = dominatedMatchCodes.size() - 1; i >= 0; i--) {
if (filterNumbersSetByUser.contains(dominatedMatchCodes.get(i))) {
tokenToMatchNumber.put(counterGlobal, dominatedMatchCodes.get(i));
if (!matchNumbersOrdered.contains(dominatedMatchCodes.get(i))) {
if (!filterNumberFound) {
matchNumbersOrdered.add(dominatedMatchCodes.get(i));
filterNumberFound = true;
}
}
break;
}
}
}
// reset maxMatchesPerLine
if (maxMatchesPerLine < matchNumbersOrdered.size()) {
maxMatchesPerLine = matchNumbersOrdered.size();
}
// fill the adjacency matrix
if (matchNumbersOrdered.size() > 1) {
Iterator<Long> it = matchNumbersOrdered.iterator();
int prev = Integer.parseInt(String.valueOf((Long) it.next()));
matchNumbersGlobal.add(prev);
while (it.hasNext()) {
int curr = Integer.parseInt(String.valueOf((Long) it.next()));
matchNumbersGlobal.add(curr);
adjacencyMatrix[prev - 1][curr - 1] = 1;
matrixIsFilled = true;
prev = curr;
}
} else {
matchNumbersGlobal.add(Integer.parseInt(String.valueOf(matchNumbersOrdered.get(0))));
singleMatchesGlobal.add(matchNumbersOrdered.get(0));
}
}
// set previous speaker name
prevSpeakerName = currSpeakerName;
}
}
}
}
use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.
the class CommonHelper method getSortedSegmentationNodes.
/**
* Calculates a {@link SOrderRelation} node chain of a {@link SDocumentGraph}.
*
* <p>
* If no segmentation name is set, a list of sorted {@link SToken} will be
* returned.<p>
*
* @param segName The segmentation name, for which the chain is computed.
* @param graph The salt document graph, which is traversed for the
* segmentation.
*
* @return Returns a List of {@link SNode}, which is sorted by the
* {@link SOrderRelation}.
*/
public static List<SNode> getSortedSegmentationNodes(String segName, SDocumentGraph graph) {
List<SNode> token = new ArrayList<SNode>();
if (segName == null) {
// if no segmentation is given just return the sorted token list
List<SToken> unsortedToken = graph.getSortedTokenByText();
if (unsortedToken != null) {
token.addAll(unsortedToken);
}
} else {
// get the very first node of the order relation chain
Set<SNode> startNodes = new LinkedHashSet<SNode>();
for (SNode n : graph.getNodes()) {
SFeature feat = n.getFeature(AnnisConstants.ANNIS_NS, AnnisConstants.FEAT_FIRST_NODE_SEGMENTATION_CHAIN);
if (feat != null && segName.equalsIgnoreCase(feat.getValue_STEXT())) {
startNodes.add(n);
}
}
Set<String> alreadyAdded = new HashSet<String>();
// add all nodes on the order relation chain beginning from the start node
for (SNode s : startNodes) {
SNode current = s;
while (current != null) {
token.add(current);
List<SRelation<SNode, SNode>> out = graph.getOutRelations(current.getId());
current = null;
if (out != null) {
for (SRelation<? extends SNode, ? extends SNode> e : out) {
if (e instanceof SOrderRelation) {
current = ((SOrderRelation) e).getTarget();
if (alreadyAdded.contains(current.getId())) {
// abort if cycle detected
current = null;
} else {
alreadyAdded.add(current.getId());
}
break;
}
}
}
}
}
}
return token;
}
use of org.corpus_tools.salt.common.SToken in project ANNIS by korpling.
the class TimelineReconstructor method convertSpanToToken.
private void convertSpanToToken(SStructuredNode span, String orderName) {
final Set<String> validSpanAnnos = new HashSet<>(order2spanAnnos.get(orderName));
if (!nodesToDelete.contains(span)) {
nodesToDelete.add(span);
if (textsByName.get(orderName) == null) {
STextualDS newText = graph.createTextualDS("");
newText.setName(orderName);
textsByName.put(orderName, newText);
textDataByName.put(orderName, new StringBuilder());
}
STextualDS textDS = textsByName.get(orderName);
StringBuilder textData = textDataByName.get(orderName);
TreeSet<Integer> coveredIdx = new TreeSet<>(spans2TimelinePos.get(span));
if (!coveredIdx.isEmpty()) {
SAnnotation textValueAnno = getTextValueAnno(orderName, span);
if (textValueAnno != null) {
String textValue = textValueAnno.getValue_STEXT();
int startTextIdx = textData.length();
textData.append(textValue);
int endTextIdx = textData.length();
SToken newToken = graph.createToken(textDS, startTextIdx, endTextIdx);
// keep track of changed ids for matches
if (this.matchIDs.contains(span.getId()))
this.oldID2newID.put(span.getId(), newToken.getId());
// move all features to the new token
if (span.getFeatures() != null) {
for (SFeature feat : span.getFeatures()) {
if (!"salt".equals(feat.getNamespace())) {
newToken.addFeature(feat);
}
}
}
// move all annotations to the new token
if (span.getAnnotations() != null) {
for (SAnnotation annot : span.getAnnotations()) {
if (!"salt".equals(annot.getNamespace()) && !orderName.equals(annot.getName())) {
newToken.addAnnotation(annot);
}
}
}
STimelineRelation timeRel = SaltFactory.createSTimelineRelation();
timeRel.setSource(newToken);
timeRel.setTarget(graph.getTimeline());
timeRel.setStart(coveredIdx.first());
timeRel.setEnd(coveredIdx.last());
graph.addRelation(timeRel);
moveRelations(span, newToken, validSpanAnnos, orderName);
}
}
}
}
Aggregations