Search in sources :

Example 1 with Range

use of de.catma.document.Range in project catma by forTEXT.

the class TeiUserMarkupCollectionSerializer method mergeTagReferences.

private HashMap<Range, List<TagReference>> mergeTagReferences(List<TagReference> tagReferences, Range initialRange) {
    HashMap<Range, List<TagReference>> mergedTagReferences = new HashMap<Range, List<TagReference>>();
    mergedTagReferences.put(initialRange, new ArrayList<TagReference>());
    for (TagReference tagReference : tagReferences) {
        Range targetRange = tagReference.getRange();
        List<Range> affectedRanges = getAffectedRanges(mergedTagReferences.keySet(), targetRange);
        for (Range affectedRange : affectedRanges) {
            if (affectedRange.isInBetween(targetRange)) {
                mergedTagReferences.get(affectedRange).add(tagReference);
            } else {
                List<TagReference> existingReferences = mergedTagReferences.get(affectedRange);
                Range overlappingRange = affectedRange.getOverlappingRange(targetRange);
                List<Range> disjointRanges = affectedRange.getDisjointRanges(targetRange);
                // range outside of the overlapping range
                // left or right depending on the position of the overlapping range
                Range firstDisjointRange = disjointRanges.get(0);
                List<TagReference> firstCopy = new ArrayList<TagReference>();
                firstCopy.addAll(existingReferences);
                mergedTagReferences.put(firstDisjointRange, firstCopy);
                // the overlapping range sits in the middle
                if (disjointRanges.size() == 2) {
                    // range right of the overlappting range
                    Range secondDisjointRange = disjointRanges.get(1);
                    List<TagReference> secondCopy = new ArrayList<TagReference>();
                    secondCopy.addAll(existingReferences);
                    mergedTagReferences.put(secondDisjointRange, secondCopy);
                }
                existingReferences.add(tagReference);
                mergedTagReferences.put(overlappingRange, existingReferences);
                mergedTagReferences.remove(affectedRange);
            }
        }
    }
    return mergedTagReferences;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) TagReference(de.catma.document.annotation.TagReference) Range(de.catma.document.Range)

Example 2 with Range

use of de.catma.document.Range in project catma by forTEXT.

the class TeiUserMarkupCollectionSerializer method serialize.

public void serialize(AnnotationCollection userMarkupCollection, SourceDocument sourceDocument) throws IOException {
    String targetURI = makeTargetURI(sourceDocument);
    if (userMarkupCollection.isEmpty()) {
        return;
    }
    TeiElement textElement = (TeiElement) teiDocument.getNodes(TeiElementName.text).get(0);
    TeiElement ptrParentElement = (TeiElement) teiDocument.getNodes(TeiElementName.ab, AttributeValue.type_catma).get(0);
    Set<String> addedTagInstances = new HashSet<String>();
    HashMap<Range, List<TagReference>> mergedTagReferences = mergeTagReferences(userMarkupCollection.getTagReferences(), new Range(0, sourceDocument.getLength()));
    TreeSet<Range> sortedRanges = new TreeSet<Range>();
    sortedRanges.addAll(mergedTagReferences.keySet());
    for (Range range : sortedRanges) {
        List<TagReference> currentReferences = mergedTagReferences.get(range);
        TeiElement parent = ptrParentElement;
        if (!currentReferences.isEmpty()) {
            parent = writeSegment(currentReferences, ptrParentElement, textElement, addedTagInstances, userMarkupCollection.getTagLibrary());
        }
        writeText(targetURI, range, parent, sourceDocument);
    }
}
Also used : TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) List(java.util.List) TagReference(de.catma.document.annotation.TagReference) Range(de.catma.document.Range) HashSet(java.util.HashSet)

Example 3 with Range

use of de.catma.document.Range in project catma by forTEXT.

the class V3TeiDocumentConverter method getTarget.

private Pair<String, Range> getTarget(TeiElement pointer) {
    String target = pointer.getAttributeValue(Attribute.ptr_target);
    String[] uri_points = target.split("#");
    String uri = uri_points[0].trim();
    String[] points = uri_points[1].split("/.");
    try {
        uri = URLEncoder.encode(uri, "UTF8");
        Range r = new Range(Integer.valueOf(points[1].substring(0, points[1].indexOf(',')).trim()), Integer.valueOf(points[2].substring(0, points[2].indexOf(')')).trim()));
        return new Pair<String, Range>(uri, r);
    } catch (UnsupportedEncodingException uee) {
        throw new IllegalStateException("UTF8 characterset not supported!");
    }
}
Also used : UnsupportedEncodingException(java.io.UnsupportedEncodingException) Range(de.catma.document.Range) Pair(de.catma.util.Pair)

Example 4 with Range

use of de.catma.document.Range in project catma by forTEXT.

the class V3TeiDocumentConverter method getInstanceID.

private String getInstanceID(String oldTagID, Range currentRange) {
    if (oldTagID2Ranges.containsKey(oldTagID)) {
        Set<Range> ranges = oldTagID2Ranges.get(oldTagID);
        for (Range r : ranges) {
            if (currentRange.isAdjacentTo(r)) {
                ranges.add(currentRange);
                String instanceID = this.oldInstance2newInstanceID.get(new OldInstance(r, oldTagID));
                return instanceID;
            }
        }
        ranges.add(currentRange);
    } else {
        HashSet<Range> ranges = new HashSet<Range>();
        ranges.add(currentRange);
        this.oldTagID2Ranges.put(oldTagID, ranges);
    }
    String newInstanceID = catmaIDGenerator.generate();
    return newInstanceID;
}
Also used : Range(de.catma.document.Range) HashSet(java.util.HashSet)

Example 5 with Range

use of de.catma.document.Range in project catma by forTEXT.

the class V3TeiDocumentConverter method adjustPointers.

private void adjustPointers(TeiDocument teiDocument) {
    Nodes pointers = teiDocument.getNodes(TeiElementName.ptr);
    for (int i = 0; i < pointers.size(); i++) {
        TeiElement pointer = (TeiElement) pointers.get(i);
        Pair<String, Range> target = getTarget(pointer);
        String newTarget = "catma:///" + target.getFirst() + "#char=" + target.getSecond().getStartPoint() + "," + target.getSecond().getEndPoint();
        pointer.setAttributeValue(Attribute.ptr_target, newTarget);
    }
}
Also used : Range(de.catma.document.Range) Nodes(nu.xom.Nodes)

Aggregations

Range (de.catma.document.Range)30 ArrayList (java.util.ArrayList)11 List (java.util.List)10 TagQueryResultRow (de.catma.queryengine.result.TagQueryResultRow)9 QueryResultRow (de.catma.queryengine.result.QueryResultRow)8 TagDefinition (de.catma.tag.TagDefinition)8 TagReference (de.catma.document.annotation.TagReference)7 QueryResultRowArray (de.catma.queryengine.result.QueryResultRowArray)7 TagInstance (de.catma.tag.TagInstance)7 SourceDocument (de.catma.document.source.SourceDocument)6 Property (de.catma.tag.Property)6 GraphTraversalSource (org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource)5 Vertex (org.apache.tinkerpop.gremlin.structure.Vertex)5 AnnotationCollectionReference (de.catma.document.annotation.AnnotationCollectionReference)4 Project (de.catma.project.Project)4 QueryResult (de.catma.queryengine.result.QueryResult)4 PropertyDefinition (de.catma.tag.PropertyDefinition)4 HashMap (java.util.HashMap)4 HashSet (java.util.HashSet)4 Indexer (de.catma.indexer.Indexer)3