Search in sources :

Example 1 with QueryResultRow

use of de.catma.queryengine.result.QueryResultRow in project catma by forTEXT.

the class CSVExportGroupedStreamSource method getStream.

@Override
public InputStream getStream() {
    final QueryResult queryResult = queryResultSupplier.get();
    final Set<String> documentIds = new TreeSet<String>();
    final Multimap<String, String> collectionIdByDocumentId = ArrayListMultimap.create();
    final Table<String, String, Integer> groupings = HashBasedTable.create();
    for (QueryResultRow row : queryResult) {
        String group = row.getPhrase();
        if (groupByTagSupplier.get()) {
            if (row instanceof TagQueryResultRow) {
                group = ((TagQueryResultRow) row).getTagDefinitionPath();
            } else {
                group = TagQueryResultRowItem.getNoTagAvailableKey();
            }
        }
        groupings.put(group, "Total", getValue(groupings, group, "Total") + 1);
        groupings.put(group, row.getSourceDocumentId(), getValue(groupings, group, row.getSourceDocumentId()) + 1);
        documentIds.add(row.getSourceDocumentId());
        if (row instanceof TagQueryResultRow) {
            collectionIdByDocumentId.put(row.getSourceDocumentId(), ((TagQueryResultRow) row).getMarkupCollectionId());
            groupings.put(group, ((TagQueryResultRow) row).getMarkupCollectionId(), getValue(groupings, group, ((TagQueryResultRow) row).getMarkupCollectionId()) + 1);
        }
    }
    final PipedInputStream in = new PipedInputStream();
    final UI ui = UI.getCurrent();
    backgroundServiceProvider.submit("csv-export", new DefaultProgressCallable<Void>() {

        @Override
        public Void call() throws Exception {
            PipedOutputStream out = new PipedOutputStream(in);
            OutputStreamWriter writer = new OutputStreamWriter(out, "UTF-8");
            ArrayList<String> header = new ArrayList<>();
            header.add("Group");
            header.add("Total");
            for (String documentId : documentIds) {
                KwicProvider kwicProvider = kwicProviderCache.get(documentId);
                header.add(kwicProvider.getSourceDocumentName() + " (" + documentId + ")");
                for (String collectionId : new TreeSet<String>(collectionIdByDocumentId.get(documentId))) {
                    header.add(kwicProvider.getSourceDocument().getUserMarkupCollectionReference(collectionId).toString() + " (" + collectionId + ")");
                }
            }
            try (CSVPrinter csvPrinter = new CSVPrinter(writer, CSVFormat.EXCEL.withDelimiter(';').withHeader(header.toArray(new String[] {})))) {
                for (String group : new TreeSet<String>(groupings.rowKeySet())) {
                    csvPrinter.print(group);
                    csvPrinter.print(groupings.get(group, "Total"));
                    for (String documentId : documentIds) {
                        csvPrinter.print(groupings.get(group, documentId));
                        for (String collectionId : new TreeSet<String>(collectionIdByDocumentId.get(documentId))) {
                            csvPrinter.print(groupings.get(group, collectionId));
                        }
                    }
                    csvPrinter.println();
                }
                csvPrinter.flush();
            }
            // intended
            return null;
        }
    }, new ExecutionListener<Void>() {

        @Override
        public void done(Void result) {
        // noop
        }

        @Override
        public void error(Throwable t) {
            ((ErrorHandler) ui).showAndLogError("Error export data to CSV!", t);
        }
    });
    // waiting on the background thread to send data to the pipe
    int tries = 100;
    try {
        while (!(in.available() > 0) && tries > 0) {
            Thread.sleep(10);
            tries--;
        }
    } catch (IOException | InterruptedException e) {
        Logger.getLogger(getClass().getName()).log(Level.WARNING, "Error while waiting on CSV export!", e);
    }
    return in;
}
Also used : TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) QueryResultRow(de.catma.queryengine.result.QueryResultRow) ArrayList(java.util.ArrayList) PipedOutputStream(java.io.PipedOutputStream) CSVPrinter(org.apache.commons.csv.CSVPrinter) QueryResult(de.catma.queryengine.result.QueryResult) UI(com.vaadin.ui.UI) TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) TreeSet(java.util.TreeSet) KwicProvider(de.catma.indexer.KwicProvider) PipedInputStream(java.io.PipedInputStream) IOException(java.io.IOException) IOException(java.io.IOException) OutputStreamWriter(java.io.OutputStreamWriter)

Example 2 with QueryResultRow

use of de.catma.queryengine.result.QueryResultRow in project catma by forTEXT.

the class AnnotatedDocumentQueryResultRowItem method addChildRowItems.

@Override
public void addChildRowItems(TreeData<QueryResultRowItem> treeData, LoadingCache<String, KwicProvider> kwicProviderCache) {
    try {
        HashMap<String, QueryResultRowArray> rowsByCollectionId = new HashMap<String, QueryResultRowArray>();
        for (QueryResultRow row : groupedQueryResult) {
            if (row instanceof TagQueryResultRow) {
                TagQueryResultRow tRow = (TagQueryResultRow) row;
                String collectionId = tRow.getMarkupCollectionId();
                QueryResultRowArray rows = null;
                if (!rowsByCollectionId.containsKey(collectionId)) {
                    rows = new QueryResultRowArray();
                    rowsByCollectionId.put(collectionId, rows);
                } else {
                    rows = rowsByCollectionId.get(collectionId);
                }
                rows.add(row);
            }
        }
        for (String collectionId : rowsByCollectionId.keySet()) {
            SourceDocument document = kwicProviderCache.get(getDocumentId()).getSourceDocument();
            String collectionName = document.getUserMarkupCollectionReference(collectionId).getName();
            QueryResultRowArray rows = rowsByCollectionId.get(collectionId);
            CollectionQueryResultRowItem item = new CollectionQueryResultRowItem(identity, collectionName, getDocumentId(), collectionId, rows, project);
            if (!treeData.contains(item)) {
                treeData.addItem(this, item);
                treeData.addItem(item, new DummyQueryResultRowItem());
            }
        }
    } catch (Exception e) {
        ((ErrorHandler) UI.getCurrent()).showAndLogError("error displaying annotated query results", e);
    }
}
Also used : HashMap(java.util.HashMap) QueryResultRow(de.catma.queryengine.result.QueryResultRow) TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) SourceDocument(de.catma.document.source.SourceDocument) QueryResultRowArray(de.catma.queryengine.result.QueryResultRowArray)

Example 3 with QueryResultRow

use of de.catma.queryengine.result.QueryResultRow in project catma by forTEXT.

the class CollectionQueryResultRowItem method addChildRowItems.

@Override
public void addChildRowItems(TreeData<QueryResultRowItem> treeData, LoadingCache<String, KwicProvider> kwicProviderCache) {
    try {
        for (QueryResultRow row : getRows()) {
            if (row instanceof TagQueryResultRow) {
                TagQueryResultRow tRow = (TagQueryResultRow) row;
                KwicProvider kwicProvider = kwicProviderCache.get(row.getSourceDocumentId());
                TagDefinition tagDefinition = project.getTagManager().getTagLibrary().getTagDefinition(tRow.getTagDefinitionId());
                KwicQueryResultRowItem item = new KwicQueryResultRowItem(tRow, AnnotatedTextProvider.buildAnnotatedText(new ArrayList<>(tRow.getRanges()), kwicProvider, tagDefinition), AnnotatedTextProvider.buildAnnotatedKeywordInContext(new ArrayList<>(tRow.getRanges()), kwicProvider, tagDefinition, tRow.getTagDefinitionPath()), true);
                if (!treeData.contains(item)) {
                    treeData.addItem(this, item);
                }
            }
        }
    } catch (Exception e) {
        ((ErrorHandler) UI.getCurrent()).showAndLogError("error displaying annotated kwic query results", e);
    }
}
Also used : TagDefinition(de.catma.tag.TagDefinition) QueryResultRow(de.catma.queryengine.result.QueryResultRow) TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) KwicProvider(de.catma.indexer.KwicProvider) ArrayList(java.util.ArrayList)

Example 4 with QueryResultRow

use of de.catma.queryengine.result.QueryResultRow in project catma by forTEXT.

the class OrRefinement method refine.

public QueryResult refine(QueryResult result) throws Exception {
    QueryResultRowArray refinedResult1 = refinement1.refine(result).asQueryResultRowArray();
    QueryResultRowArray refinedResult2 = refinement2.refine(result).asQueryResultRowArray();
    Set<QueryResultRow> withoutDuplicates = new HashSet<QueryResultRow>();
    withoutDuplicates.addAll(refinedResult1);
    withoutDuplicates.addAll(refinedResult2);
    QueryResultRowArray combinedResult = new QueryResultRowArray();
    combinedResult.addAll(withoutDuplicates);
    return combinedResult;
}
Also used : QueryResultRow(de.catma.queryengine.result.QueryResultRow) QueryResultRowArray(de.catma.queryengine.result.QueryResultRowArray) HashSet(java.util.HashSet)

Example 5 with QueryResultRow

use of de.catma.queryengine.result.QueryResultRow in project catma by forTEXT.

the class TPGraphProjectIndexer method searchPhrase.

private QueryResult searchPhrase(QueryId queryId, List<String> documentIdList, String phrase, List<String> termList, int limit, BiPredicate<String, String> termTestFunction) {
    GraphTraversalSource g = graph.traversal();
    GraphTraversal<Vertex, Vertex> currentTraversal = g.V().hasLabel(nt(ProjectRevision)).outE(rt(hasDocument)).inV().has(nt(SourceDocument), "documentId", P.within(documentIdList)).as("doc").inE(rt(isPartOf)).outV().has(nt(Term), "literal", P.test(termTestFunction, termList.get(0))).outE(rt(hasPosition)).inV().hasLabel(nt(Position)).as("startPos", "currentPos");
    if (termList.size() > 1) {
        for (String term : termList.subList(1, termList.size())) {
            currentTraversal = currentTraversal.outE(rt(isAdjacentTo)).inV().hasLabel(nt(Position)).as("currentPos").inE(rt(hasPosition)).outV().has(nt(Term), "literal", P.test(termTestFunction, term)).select("currentPos");
        }
    }
    if (limit > 0) {
        currentTraversal = currentTraversal.limit(limit);
    }
    return new QueryResultRowArray(currentTraversal.select("doc", "startPos", "currentPos").by("documentId").by("startOffset").by("endOffset").map(resultMap -> new QueryResultRow(queryId, (String) resultMap.get().get("doc"), new Range((Integer) resultMap.get().get("startPos"), (Integer) resultMap.get().get("currentPos")), phrase)).toList());
}
Also used : GraphTraversalSource(org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource) Vertex(org.apache.tinkerpop.gremlin.structure.Vertex) TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) CommentQueryResultRow(de.catma.queryengine.result.CommentQueryResultRow) QueryResultRow(de.catma.queryengine.result.QueryResultRow) QueryResultRowArray(de.catma.queryengine.result.QueryResultRowArray) Range(de.catma.document.Range)

Aggregations

QueryResultRow (de.catma.queryengine.result.QueryResultRow)29 TagQueryResultRow (de.catma.queryengine.result.TagQueryResultRow)22 QueryResultRowArray (de.catma.queryengine.result.QueryResultRowArray)18 SourceDocument (de.catma.document.source.SourceDocument)13 Range (de.catma.document.Range)12 KwicProvider (de.catma.indexer.KwicProvider)10 Project (de.catma.project.Project)10 QueryResult (de.catma.queryengine.result.QueryResult)10 ArrayList (java.util.ArrayList)10 AnnotationCollectionReference (de.catma.document.annotation.AnnotationCollectionReference)8 TagDefinition (de.catma.tag.TagDefinition)8 UI (com.vaadin.ui.UI)6 HashSet (java.util.HashSet)6 CacheLoader (com.google.common.cache.CacheLoader)5 LoadingCache (com.google.common.cache.LoadingCache)5 Query (com.vaadin.data.provider.Query)5 AnnotationCollectionManager (de.catma.document.annotation.AnnotationCollectionManager)5 List (java.util.List)5 Map (java.util.Map)5 CacheBuilder (com.google.common.cache.CacheBuilder)4