Search in sources :

Example 1 with TagQueryResultRow

use of de.catma.queryengine.result.TagQueryResultRow in project catma by forTEXT.

the class CSVExportGroupedStreamSource method getStream.

@Override
public InputStream getStream() {
    final QueryResult queryResult = queryResultSupplier.get();
    final Set<String> documentIds = new TreeSet<String>();
    final Multimap<String, String> collectionIdByDocumentId = ArrayListMultimap.create();
    final Table<String, String, Integer> groupings = HashBasedTable.create();
    for (QueryResultRow row : queryResult) {
        String group = row.getPhrase();
        if (groupByTagSupplier.get()) {
            if (row instanceof TagQueryResultRow) {
                group = ((TagQueryResultRow) row).getTagDefinitionPath();
            } else {
                group = TagQueryResultRowItem.getNoTagAvailableKey();
            }
        }
        groupings.put(group, "Total", getValue(groupings, group, "Total") + 1);
        groupings.put(group, row.getSourceDocumentId(), getValue(groupings, group, row.getSourceDocumentId()) + 1);
        documentIds.add(row.getSourceDocumentId());
        if (row instanceof TagQueryResultRow) {
            collectionIdByDocumentId.put(row.getSourceDocumentId(), ((TagQueryResultRow) row).getMarkupCollectionId());
            groupings.put(group, ((TagQueryResultRow) row).getMarkupCollectionId(), getValue(groupings, group, ((TagQueryResultRow) row).getMarkupCollectionId()) + 1);
        }
    }
    final PipedInputStream in = new PipedInputStream();
    final UI ui = UI.getCurrent();
    backgroundServiceProvider.submit("csv-export", new DefaultProgressCallable<Void>() {

        @Override
        public Void call() throws Exception {
            PipedOutputStream out = new PipedOutputStream(in);
            OutputStreamWriter writer = new OutputStreamWriter(out, "UTF-8");
            ArrayList<String> header = new ArrayList<>();
            header.add("Group");
            header.add("Total");
            for (String documentId : documentIds) {
                KwicProvider kwicProvider = kwicProviderCache.get(documentId);
                header.add(kwicProvider.getSourceDocumentName() + " (" + documentId + ")");
                for (String collectionId : new TreeSet<String>(collectionIdByDocumentId.get(documentId))) {
                    header.add(kwicProvider.getSourceDocument().getUserMarkupCollectionReference(collectionId).toString() + " (" + collectionId + ")");
                }
            }
            try (CSVPrinter csvPrinter = new CSVPrinter(writer, CSVFormat.EXCEL.withDelimiter(';').withHeader(header.toArray(new String[] {})))) {
                for (String group : new TreeSet<String>(groupings.rowKeySet())) {
                    csvPrinter.print(group);
                    csvPrinter.print(groupings.get(group, "Total"));
                    for (String documentId : documentIds) {
                        csvPrinter.print(groupings.get(group, documentId));
                        for (String collectionId : new TreeSet<String>(collectionIdByDocumentId.get(documentId))) {
                            csvPrinter.print(groupings.get(group, collectionId));
                        }
                    }
                    csvPrinter.println();
                }
                csvPrinter.flush();
            }
            // intended
            return null;
        }
    }, new ExecutionListener<Void>() {

        @Override
        public void done(Void result) {
        // noop
        }

        @Override
        public void error(Throwable t) {
            ((ErrorHandler) ui).showAndLogError("Error export data to CSV!", t);
        }
    });
    // waiting on the background thread to send data to the pipe
    int tries = 100;
    try {
        while (!(in.available() > 0) && tries > 0) {
            Thread.sleep(10);
            tries--;
        }
    } catch (IOException | InterruptedException e) {
        Logger.getLogger(getClass().getName()).log(Level.WARNING, "Error while waiting on CSV export!", e);
    }
    return in;
}
Also used : TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) QueryResultRow(de.catma.queryengine.result.QueryResultRow) ArrayList(java.util.ArrayList) PipedOutputStream(java.io.PipedOutputStream) CSVPrinter(org.apache.commons.csv.CSVPrinter) QueryResult(de.catma.queryengine.result.QueryResult) UI(com.vaadin.ui.UI) TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) TreeSet(java.util.TreeSet) KwicProvider(de.catma.indexer.KwicProvider) PipedInputStream(java.io.PipedInputStream) IOException(java.io.IOException) IOException(java.io.IOException) OutputStreamWriter(java.io.OutputStreamWriter)

Example 2 with TagQueryResultRow

use of de.catma.queryengine.result.TagQueryResultRow in project catma by forTEXT.

the class AnnotatedDocumentQueryResultRowItem method addChildRowItems.

@Override
public void addChildRowItems(TreeData<QueryResultRowItem> treeData, LoadingCache<String, KwicProvider> kwicProviderCache) {
    try {
        HashMap<String, QueryResultRowArray> rowsByCollectionId = new HashMap<String, QueryResultRowArray>();
        for (QueryResultRow row : groupedQueryResult) {
            if (row instanceof TagQueryResultRow) {
                TagQueryResultRow tRow = (TagQueryResultRow) row;
                String collectionId = tRow.getMarkupCollectionId();
                QueryResultRowArray rows = null;
                if (!rowsByCollectionId.containsKey(collectionId)) {
                    rows = new QueryResultRowArray();
                    rowsByCollectionId.put(collectionId, rows);
                } else {
                    rows = rowsByCollectionId.get(collectionId);
                }
                rows.add(row);
            }
        }
        for (String collectionId : rowsByCollectionId.keySet()) {
            SourceDocument document = kwicProviderCache.get(getDocumentId()).getSourceDocument();
            String collectionName = document.getUserMarkupCollectionReference(collectionId).getName();
            QueryResultRowArray rows = rowsByCollectionId.get(collectionId);
            CollectionQueryResultRowItem item = new CollectionQueryResultRowItem(identity, collectionName, getDocumentId(), collectionId, rows, project);
            if (!treeData.contains(item)) {
                treeData.addItem(this, item);
                treeData.addItem(item, new DummyQueryResultRowItem());
            }
        }
    } catch (Exception e) {
        ((ErrorHandler) UI.getCurrent()).showAndLogError("error displaying annotated query results", e);
    }
}
Also used : HashMap(java.util.HashMap) QueryResultRow(de.catma.queryengine.result.QueryResultRow) TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) SourceDocument(de.catma.document.source.SourceDocument) QueryResultRowArray(de.catma.queryengine.result.QueryResultRowArray)

Example 3 with TagQueryResultRow

use of de.catma.queryengine.result.TagQueryResultRow in project catma by forTEXT.

the class CollectionQueryResultRowItem method addChildRowItems.

@Override
public void addChildRowItems(TreeData<QueryResultRowItem> treeData, LoadingCache<String, KwicProvider> kwicProviderCache) {
    try {
        for (QueryResultRow row : getRows()) {
            if (row instanceof TagQueryResultRow) {
                TagQueryResultRow tRow = (TagQueryResultRow) row;
                KwicProvider kwicProvider = kwicProviderCache.get(row.getSourceDocumentId());
                TagDefinition tagDefinition = project.getTagManager().getTagLibrary().getTagDefinition(tRow.getTagDefinitionId());
                KwicQueryResultRowItem item = new KwicQueryResultRowItem(tRow, AnnotatedTextProvider.buildAnnotatedText(new ArrayList<>(tRow.getRanges()), kwicProvider, tagDefinition), AnnotatedTextProvider.buildAnnotatedKeywordInContext(new ArrayList<>(tRow.getRanges()), kwicProvider, tagDefinition, tRow.getTagDefinitionPath()), true);
                if (!treeData.contains(item)) {
                    treeData.addItem(this, item);
                }
            }
        }
    } catch (Exception e) {
        ((ErrorHandler) UI.getCurrent()).showAndLogError("error displaying annotated kwic query results", e);
    }
}
Also used : TagDefinition(de.catma.tag.TagDefinition) QueryResultRow(de.catma.queryengine.result.QueryResultRow) TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) KwicProvider(de.catma.indexer.KwicProvider) ArrayList(java.util.ArrayList)

Example 4 with TagQueryResultRow

use of de.catma.queryengine.result.TagQueryResultRow in project catma by forTEXT.

the class TPGraphProjectIndexer method searchProperty.

@Override
public QueryResult searchProperty(QueryId queryId, List<String> collectionIdList, String propertyNamePattern, String propertyValuePattern, String tagPathPattern) throws Exception {
    QueryResultRowArray result = new QueryResultRowArray();
    PropertyNameFilter propertyNameFilter = new PropertyNameFilter(propertyNamePattern);
    PropertyValueFilter propertyValueFilter = new PropertyValueFilter(propertyValuePattern);
    // add default wildcard if no explicit root is defined
    if (tagPathPattern != null) {
        if (!tagPathPattern.startsWith("/")) {
            tagPathPattern = "%" + tagPathPattern;
        }
    }
    final String tagPathRegex = tagPathPattern == null ? null : SQLWildcard2RegexConverter.convert(tagPathPattern);
    GraphTraversalSource g = graph.traversal();
    // get all Tags referenced by the participating Collections
    GraphTraversal<Vertex, Vertex> traversal = g.V().hasLabel(nt(ProjectRevision)).outE(rt(hasDocument)).inV().hasLabel(nt(SourceDocument)).outE(rt(hasCollection)).inV().has(nt(MarkupCollection), "collectionId", P.within(collectionIdList)).outE(rt(hasInstance)).inV().hasLabel(nt(TagInstance)).inE(rt(hasInstance)).outV().hasLabel(nt(Tag));
    Set<Vertex> tagVs = traversal.toSet();
    if (!tagVs.isEmpty()) {
        // get all paths for the Tags
        List<Path> tagPaths = g.V(tagVs).optional(__.repeat(__.out(rt(hasParent))).until(__.outE(rt(hasParent)).count().is(0))).path().toList();
        // collect all Tags matching the given pattern and map them by their tagId
        Map<String, String> validTagIdToTagPathMapping = new HashMap<>();
        for (Path path : tagPaths) {
            Vertex tag = path.get(0);
            String tagId = (String) tag.properties("tagId").next().orElse(null);
            StringBuilder builder = new StringBuilder();
            String conc = "/";
            path.forEach(tagVertex -> {
                builder.insert(0, ((Vertex) tagVertex).properties("name").next().orElse(null));
                builder.insert(0, conc);
            });
            String tagPathStr = builder.toString();
            if ((tagPathRegex == null) || Pattern.matches(tagPathRegex, tagPathStr)) {
                validTagIdToTagPathMapping.put(tagId, tagPathStr);
            }
        }
        // get all Annotations for the participating Collections and Tags with their matching Annotaiton Properties
        List<Map<String, Object>> resultMap = g.V().hasLabel(nt(ProjectRevision)).outE(rt(hasDocument)).inV().hasLabel(nt(SourceDocument)).as("doc-uuid").outE(rt(hasCollection)).inV().has(nt(MarkupCollection), "collectionId", P.within(collectionIdList)).as("collection-uuid").outE(rt(hasInstance)).inV().hasLabel(nt(TagInstance)).as("anno").optional(__.outE(rt(hasProperty)).inV().hasLabel(nt(AnnotationProperty)).filter(propertyValueFilter)).as("anno-property").select("anno").inE(rt(hasInstance)).outV().has(nt(Tag), "tagId", P.within(validTagIdToTagPathMapping.keySet())).as("tag").optional(__.outE(rt(hasProperty)).inV().hasLabel(nt(Property)).filter(propertyNameFilter)).as("property").select("doc-uuid", "collection-uuid", "anno", "tag", "anno-property", "property").by("documentId").by("collectionId").by().by().by().by().toList();
        HashSet<String> systemPropertiesAddedTagInstanceIds = new HashSet<>();
        for (Map<String, Object> entry : resultMap) {
            String documentId = (String) entry.get("doc-uuid");
            String collectionId = (String) entry.get("collection-uuid");
            Vertex annoV = (Vertex) entry.get("anno");
            String tagInstanceId = (String) annoV.property("tagInstanceId").value();
            @SuppressWarnings("unchecked") List<Integer> ranges = (List<Integer>) annoV.property("ranges").value();
            List<Range> rangeList = new ArrayList<>();
            for (int i = 0; i < ranges.size() - 1; i += 2) {
                rangeList.add(new Range(ranges.get(i), ranges.get(i + 1)));
            }
            String annoAuthor = (String) annoV.property("author").value();
            String annoTimestamp = (String) annoV.property("timestamp").value();
            Vertex tagV = (Vertex) entry.get("tag");
            String tagId = (String) tagV.property("tagId").value();
            String tagPath = validTagIdToTagPathMapping.get(tagId);
            TagDefinition tag = (TagDefinition) tagV.property("tag").value();
            String color = tag.getColor();
            Vertex propertyV = (Vertex) entry.get("property");
            if (propertyV.equals(tagV)) {
                // no matching Properties for this Tag
                propertyV = null;
            }
            Vertex annoPropertyV = (Vertex) entry.get("anno-property");
            if (annoPropertyV.equals(annoV)) {
                // no matching Annotation Property for this Annotation
                annoPropertyV = null;
            }
            // we try to add them now with respect to user defined name and value filters
            if (!systemPropertiesAddedTagInstanceIds.contains(tagInstanceId)) {
                // try to add rows for matching system properties
                addTagQueryResultRowForSystemProperty(queryId, result, PropertyDefinition.SystemPropertyName.catma_markupauthor, annoAuthor, propertyNameFilter, propertyValueFilter, documentId, collectionId, tagId, tagPath, tagInstanceId, rangeList);
                addTagQueryResultRowForSystemProperty(queryId, result, PropertyDefinition.SystemPropertyName.catma_markuptimestamp, annoTimestamp, propertyNameFilter, propertyValueFilter, documentId, collectionId, tagId, tagPath, tagInstanceId, rangeList);
                addTagQueryResultRowForSystemProperty(queryId, result, PropertyDefinition.SystemPropertyName.catma_displaycolor, color, propertyNameFilter, propertyValueFilter, documentId, collectionId, tagId, tagPath, tagInstanceId, rangeList);
                systemPropertiesAddedTagInstanceIds.add(tagInstanceId);
            }
            // add rows for user defined properties for each matching value
            if ((propertyV != null) && (annoPropertyV != null)) {
                @SuppressWarnings("unchecked") List<String> propertyValues = (List<String>) annoPropertyV.property("values").value();
                String annoPropertyDefinitionId = (String) annoPropertyV.property("uuid").value();
                String propertyName = (String) propertyV.property("name").value();
                String propertyDefinitionId = (String) propertyV.property("uuid").value();
                if (annoPropertyDefinitionId.equals(propertyDefinitionId)) {
                    for (String propValue : propertyValues) {
                        if (propertyValueFilter.testValue(propValue)) {
                            result.add(new TagQueryResultRow(queryId, documentId, rangeList, collectionId, tagId, tagPath, // TODO: Version
                            "", tagInstanceId, annoPropertyDefinitionId, propertyName, propValue));
                        }
                    }
                }
            }
        }
    }
    return result;
}
Also used : Vertex(org.apache.tinkerpop.gremlin.structure.Vertex) TagDefinition(de.catma.tag.TagDefinition) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) GraphTraversalSource(org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource) TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) List(java.util.List) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Path(org.apache.tinkerpop.gremlin.process.traversal.Path) Range(de.catma.document.Range) QueryResultRowArray(de.catma.queryengine.result.QueryResultRowArray) Map(java.util.Map) HashMap(java.util.HashMap)

Example 5 with TagQueryResultRow

use of de.catma.queryengine.result.TagQueryResultRow in project catma by forTEXT.

the class TPGraphProjectIndexer method searchTagDefinitionPath.

@Override
public QueryResult searchTagDefinitionPath(QueryId queryId, List<String> collectionIdList, String tagPath) throws Exception {
    QueryResultRowArray result = new QueryResultRowArray();
    if (!tagPath.startsWith("/")) {
        tagPath = "%" + tagPath;
    }
    final String tagPathRegex = SQLWildcard2RegexConverter.convert(tagPath);
    GraphTraversalSource g = graph.traversal();
    Set<Vertex> tagVs = g.V().hasLabel(nt(ProjectRevision)).outE(rt(hasDocument)).inV().hasLabel(nt(SourceDocument)).outE(rt(hasCollection)).inV().has(nt(MarkupCollection), "collectionId", P.within(collectionIdList)).outE(rt(hasInstance)).inV().hasLabel(nt(TagInstance)).inE(rt(hasInstance)).outV().hasLabel(nt(Tag)).toSet();
    if (!tagVs.isEmpty()) {
        List<Path> tagPaths = g.V(tagVs).optional(__.repeat(__.out(rt(hasParent))).until(__.outE(rt(hasParent)).count().is(0))).path().toList();
        Map<String, String> validTagIdToTagPathMapping = new HashMap<>();
        for (Path path : tagPaths) {
            Vertex tag = path.get(0);
            String tagId = (String) tag.properties("tagId").next().orElse(null);
            StringBuilder builder = new StringBuilder();
            String conc = "/";
            path.forEach(tagVertex -> {
                builder.insert(0, ((Vertex) tagVertex).properties("name").next().orElse(null));
                builder.insert(0, conc);
            });
            String tagPathStr = builder.toString();
            if (Pattern.matches(tagPathRegex, tagPathStr)) {
                validTagIdToTagPathMapping.put(tagId, tagPathStr);
            }
        }
        List<Map<String, Object>> resultMap = g.V().hasLabel(nt(ProjectRevision)).outE(rt(hasDocument)).inV().hasLabel(nt(SourceDocument)).as("doc").outE(rt(hasCollection)).inV().has(nt(MarkupCollection), "collectionId", P.within(collectionIdList)).as("collection").outE(rt(hasInstance)).inV().hasLabel(nt(TagInstance)).as("anno", "ranges").inE(rt(hasInstance)).outV().has(nt(Tag), "tagId", P.within(validTagIdToTagPathMapping.keySet())).as("tag").select("doc", "collection", "anno", "ranges", "tag").by("documentId").by("collectionId").by("tagInstanceId").by("ranges").by("tagId").toList();
        for (Map<String, Object> entry : resultMap) {
            @SuppressWarnings("unchecked") List<Integer> ranges = (List<Integer>) entry.get("ranges");
            List<Range> rangeList = new ArrayList<>();
            for (int i = 0; i < ranges.size() - 1; i += 2) {
                rangeList.add(new Range(ranges.get(i), ranges.get(i + 1)));
            }
            result.add(new TagQueryResultRow(queryId, (String) entry.get("doc"), rangeList, (String) entry.get("collection"), (String) entry.get("tag"), validTagIdToTagPathMapping.get((String) entry.get("tag")), // TODO: version
            "", (String) entry.get("anno")));
        }
    }
    return result;
}
Also used : Path(org.apache.tinkerpop.gremlin.process.traversal.Path) Vertex(org.apache.tinkerpop.gremlin.structure.Vertex) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Range(de.catma.document.Range) GraphTraversalSource(org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource) TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) List(java.util.List) ArrayList(java.util.ArrayList) QueryResultRowArray(de.catma.queryengine.result.QueryResultRowArray) Map(java.util.Map) HashMap(java.util.HashMap)

Aggregations

TagQueryResultRow (de.catma.queryengine.result.TagQueryResultRow)20 QueryResultRow (de.catma.queryengine.result.QueryResultRow)13 Range (de.catma.document.Range)7 QueryResultRowArray (de.catma.queryengine.result.QueryResultRowArray)7 ArrayList (java.util.ArrayList)7 SourceDocument (de.catma.document.source.SourceDocument)6 KwicProvider (de.catma.indexer.KwicProvider)6 TagDefinition (de.catma.tag.TagDefinition)6 QueryResult (de.catma.queryengine.result.QueryResult)5 ExecutionException (java.util.concurrent.ExecutionException)5 AnnotationCollectionReference (de.catma.document.annotation.AnnotationCollectionReference)4 HashMap (java.util.HashMap)4 Indexer (de.catma.indexer.Indexer)3 Project (de.catma.project.Project)3 IOException (java.io.IOException)3 List (java.util.List)3 Map (java.util.Map)3 UI (com.vaadin.ui.UI)2 KeywordInSpanContext (de.catma.indexer.KeywordInSpanContext)2 OutputStreamWriter (java.io.OutputStreamWriter)2