Search in sources :

Example 1 with KeywordInSpanContext

use of de.catma.indexer.KeywordInSpanContext in project catma by forTEXT.

the class AnnotatedTextProvider method buildAnnotatedKeywordInContext.

public static String buildAnnotatedKeywordInContext(List<Range> ranges, KwicProvider kwicProvider, TagDefinition tagDefinition, String tagPath) {
    StringBuilder builder = new StringBuilder();
    try {
        List<KeywordInSpanContext> kwics = kwicProvider.getKwic(Range.mergeRanges(new TreeSet<>(ranges)), 5);
        String conc = "";
        for (KeywordInSpanContext kwic : kwics) {
            builder.append(Cleaner.clean(kwic.getBackwardContext()));
            builder.append("<span");
            builder.append(" class=\"annotation-details-tag-color\"");
            builder.append(" style=\"");
            builder.append(" background-color:");
            builder.append("#" + ColorConverter.toHex(tagDefinition.getColor()));
            builder.append(";");
            builder.append(" color:");
            builder.append(ColorConverter.isLightColor(tagDefinition.getColor()) ? "black" : "white");
            builder.append(";");
            builder.append("\">");
            builder.append(Cleaner.clean(shorten(kwic.getKeyword(), LARGE_MAX_ANNOTATED_KEYWORD_DISPLAY_LENGTH)));
            builder.append("</span>");
            builder.append(Cleaner.clean(kwic.getForwardContext()));
            builder.append(conc);
            conc = " [" + HORIZONTAL_ELLIPSIS + "] ";
        }
        builder.append("<br /><hr />");
        builder.append("Tag Path: <b>");
        builder.append(tagPath);
        builder.append("</b>");
    } catch (IOException e) {
        ((ErrorHandler) UI.getCurrent()).showAndLogError("Error loading keyword in context!", e);
    }
    return builder.toString();
}
Also used : TreeSet(java.util.TreeSet) KeywordInSpanContext(de.catma.indexer.KeywordInSpanContext) IOException(java.io.IOException)

Example 2 with KeywordInSpanContext

use of de.catma.indexer.KeywordInSpanContext in project catma by forTEXT.

the class AnnotatedTextProvider method buildKeywordInContext.

private static String buildKeywordInContext(String keyword, Range range, KwicProvider kwicProvider, int keywordLength) {
    StringBuilder builder = new StringBuilder();
    try {
        KeywordInSpanContext kwic = kwicProvider.getKwic(range, 5);
        builder.append(Cleaner.clean(kwic.getBackwardContext()));
        builder.append("<span");
        builder.append(" class=\"annotation-details-tag-color\"");
        builder.append(" style=\"");
        builder.append(" background-color:");
        builder.append("#cacfd2");
        builder.append(";");
        builder.append("\">");
        builder.append(Cleaner.clean(shorten(kwic.getKeyword(), keywordLength)));
        builder.append("</span>");
        builder.append(Cleaner.clean(kwic.getForwardContext()));
    } catch (IOException e) {
        ((ErrorHandler) UI.getCurrent()).showAndLogError("Error loading keyword in context!", e);
    }
    return builder.toString();
}
Also used : KeywordInSpanContext(de.catma.indexer.KeywordInSpanContext) IOException(java.io.IOException)

Example 3 with KeywordInSpanContext

use of de.catma.indexer.KeywordInSpanContext in project catma by forTEXT.

the class KwicListJSONSerializer method toJSON.

public String toJSON(List<KeywordInContext> kwicList, boolean caseSensitive) {
    JsonNodeFactory factory = JsonNodeFactory.instance;
    ObjectNode kwicListJson = factory.objectNode();
    ArrayNode prefixArraysJson = factory.arrayNode();
    ArrayNode tokenArray = factory.arrayNode();
    ArrayNode postfixArraysJson = factory.arrayNode();
    kwicListJson.set(KwicSerializationField.prefixArrays.name(), prefixArraysJson);
    kwicListJson.set(KwicSerializationField.tokenArray.name(), tokenArray);
    kwicListJson.set(KwicSerializationField.postfixArrays.name(), postfixArraysJson);
    kwicListJson.put(KwicSerializationField.caseSensitive.name(), Boolean.toString(caseSensitive));
    int rtlCount = 0;
    for (KeywordInContext kwic : kwicList) {
        if (kwic instanceof KeywordInSpanContext) {
            KeywordInSpanContext spanKwic = (KeywordInSpanContext) kwic;
            ArrayNode prefixArrayJson = factory.arrayNode();
            prefixArraysJson.add(prefixArrayJson);
            for (TermInfo ti : spanKwic.getSpanContext().getBackwardTokens()) {
                prefixArrayJson.add(ti.getTerm());
            }
            tokenArray.add(spanKwic.getKeyword());
            ArrayNode postfixArrayJson = factory.arrayNode();
            postfixArraysJson.add(postfixArrayJson);
            for (TermInfo ti : spanKwic.getSpanContext().getForwardTokens()) {
                postfixArrayJson.add(ti.getTerm());
            }
            if (kwic.isRightToLeft()) {
                rtlCount++;
            }
        }
    }
    // rightToLeftLanaguage->true if more than half of the kwics stem from RTL documents
    kwicListJson.put(KwicSerializationField.rightToLeftLanguage.name(), Boolean.toString(rtlCount > (BigDecimal.valueOf(kwicList.size()).divide(BigDecimal.valueOf(2), BigDecimal.ROUND_HALF_UP).intValue())));
    return kwicListJson.toString();
}
Also used : ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) TermInfo(de.catma.indexer.TermInfo) ArrayNode(com.fasterxml.jackson.databind.node.ArrayNode) KeywordInSpanContext(de.catma.indexer.KeywordInSpanContext) KeywordInContext(de.catma.document.source.KeywordInContext) JsonNodeFactory(com.fasterxml.jackson.databind.node.JsonNodeFactory)

Example 4 with KeywordInSpanContext

use of de.catma.indexer.KeywordInSpanContext in project catma by forTEXT.

the class DoubleTreePanel method setSelectedQueryResultRows.

@Override
public void setSelectedQueryResultRows(Iterable<QueryResultRow> selectedRows) {
    kwics.clear();
    if ((selectedRows.iterator().next() instanceof TagQueryResultRow) && displaySettings.equals(DisplaySetting.GROUPED_BY_TAG)) {
        for (QueryResultRow row : selectedRows) {
            TagQueryResultRow tqrr = (TagQueryResultRow) row;
            String tagPath = tqrr.getTagDefinitionPath().replace("/", "");
            KwicProvider kwicProvider = null;
            try {
                kwicProvider = kwicProviderCache.get(row.getSourceDocumentId());
            } catch (ExecutionException e1) {
                ((CatmaApplication) UI.getCurrent()).showAndLogError("Error visualizing group by tag", e1);
            }
            KeywordInSpanContext kwic = null;
            try {
                kwic = kwicProvider.getKwic(row.getRange(), contextSize);
                KeywordInSpanContext newKwic = new KeywordInSpanContext(tagPath, kwic.getKwic(), kwic.getKwicSourceRange(), kwic.getRelativeKeywordStartPos(), kwic.isRightToLeft(), kwic.getSpanContext());
                kwics.add(newKwic);
            } catch (IOException e) {
                ((CatmaApplication) UI.getCurrent()).showAndLogError("Error visualizing group by tag", e);
            }
        }
        doubleTree.setupFromArrays(kwics, true);
    } else {
        for (QueryResultRow row : selectedRows) {
            KwicProvider kwicProvider = null;
            try {
                kwicProvider = kwicProviderCache.get(row.getSourceDocumentId());
            } catch (ExecutionException e1) {
                ((CatmaApplication) UI.getCurrent()).showAndLogError("Error visualizing selected data", e1);
            }
            KeywordInSpanContext kwic = null;
            try {
                kwic = kwicProvider.getKwic(row.getRange(), contextSize);
            } catch (IOException e) {
                ((CatmaApplication) UI.getCurrent()).showAndLogError("Error visualizing selected data", e);
            }
            kwics.add(kwic);
        }
        doubleTree.setupFromArrays(kwics, true);
    }
}
Also used : TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) QueryResultRow(de.catma.queryengine.result.QueryResultRow) KwicProvider(de.catma.indexer.KwicProvider) KeywordInSpanContext(de.catma.indexer.KeywordInSpanContext) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Example 5 with KeywordInSpanContext

use of de.catma.indexer.KeywordInSpanContext in project catma by forTEXT.

the class CSVExportFlatStreamSource method getStream.

@Override
public InputStream getStream() {
    final QueryResult queryResult = queryResultSupplier.get();
    final PipedInputStream in = new PipedInputStream();
    final UI ui = UI.getCurrent();
    final Lock lock = new ReentrantLock();
    final Condition sending = lock.newCondition();
    lock.lock();
    backgroundServiceProvider.submit("csv-export", new DefaultProgressCallable<Void>() {

        @Override
        public Void call() throws Exception {
            PipedOutputStream out = new PipedOutputStream(in);
            OutputStreamWriter writer = new OutputStreamWriter(out, "UTF-8");
            LoadingCache<String, String> colorCache = CacheBuilder.newBuilder().build(new CacheLoader<String, String>() {

                @Override
                public String load(String tagDefinitionId) throws Exception {
                    return "#" + ColorConverter.toHex(project.getTagManager().getTagLibrary().getTagDefinition(tagDefinitionId).getColor());
                }
            });
            try (CSVPrinter csvPrinter = new CSVPrinter(writer, CSVFormat.EXCEL.withDelimiter(';'))) {
                for (QueryResultRow row : queryResult) {
                    KwicProvider kwicProvider = kwicProviderCache.get(row.getSourceDocumentId());
                    if (row instanceof TagQueryResultRow) {
                        TagQueryResultRow tRow = (TagQueryResultRow) row;
                        List<Range> mergedRanges = Range.mergeRanges(new TreeSet<>((tRow).getRanges()));
                        for (Range range : mergedRanges) {
                            KeywordInSpanContext kwic = kwicProvider.getKwic(range, 5);
                            csvPrinter.printRecord(row.getQueryId().toSerializedString(), row.getSourceDocumentId(), kwicProvider.getSourceDocumentName(), kwicProvider.getDocumentLength(), kwic.getKeyword(), kwic.toString(), range.getStartPoint(), range.getEndPoint(), tRow.getMarkupCollectionId(), kwicProvider.getSourceDocument().getUserMarkupCollectionReference(tRow.getMarkupCollectionId()).toString(), tRow.getTagDefinitionPath(), tRow.getTagDefinitionVersion(), colorCache.get(tRow.getTagDefinitionId()), tRow.getTagInstanceId(), tRow.getPropertyDefinitionId(), tRow.getPropertyName(), tRow.getPropertyValue());
                        }
                    } else {
                        KeywordInSpanContext kwic = kwicProvider.getKwic(row.getRange(), 5);
                        csvPrinter.printRecord(row.getQueryId().toSerializedString(), row.getSourceDocumentId(), kwicProvider.getSourceDocumentName(), kwicProvider.getDocumentLength(), kwic.getKeyword(), kwic.toString(), row.getRange().getStartPoint(), row.getRange().getEndPoint());
                    }
                    csvPrinter.flush();
                    lock.lock();
                    try {
                        sending.signal();
                    } finally {
                        lock.unlock();
                    }
                }
            }
            // intended
            return null;
        }
    }, new ExecutionListener<Void>() {

        @Override
        public void done(Void result) {
        // noop
        }

        @Override
        public void error(Throwable t) {
            ((ErrorHandler) ui).showAndLogError("Error export data to CSV!", t);
        }
    });
    // waiting on the background thread to send data to the pipe
    try {
        try {
            sending.await(10, TimeUnit.SECONDS);
        } catch (InterruptedException e1) {
            Logger.getLogger(getClass().getName()).log(Level.WARNING, "Error while waiting on CSV export!", e1);
        }
    } finally {
        lock.unlock();
    }
    return in;
}
Also used : TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) QueryResultRow(de.catma.queryengine.result.QueryResultRow) PipedOutputStream(java.io.PipedOutputStream) CSVPrinter(org.apache.commons.csv.CSVPrinter) QueryResult(de.catma.queryengine.result.QueryResult) UI(com.vaadin.ui.UI) TagQueryResultRow(de.catma.queryengine.result.TagQueryResultRow) TreeSet(java.util.TreeSet) KwicProvider(de.catma.indexer.KwicProvider) LoadingCache(com.google.common.cache.LoadingCache) List(java.util.List) ReentrantLock(java.util.concurrent.locks.ReentrantLock) Condition(java.util.concurrent.locks.Condition) PipedInputStream(java.io.PipedInputStream) KeywordInSpanContext(de.catma.indexer.KeywordInSpanContext) Range(de.catma.document.Range) ReentrantLock(java.util.concurrent.locks.ReentrantLock) Lock(java.util.concurrent.locks.Lock) OutputStreamWriter(java.io.OutputStreamWriter) CacheLoader(com.google.common.cache.CacheLoader)

Aggregations

KeywordInSpanContext (de.catma.indexer.KeywordInSpanContext)5 IOException (java.io.IOException)3 KwicProvider (de.catma.indexer.KwicProvider)2 QueryResultRow (de.catma.queryengine.result.QueryResultRow)2 TagQueryResultRow (de.catma.queryengine.result.TagQueryResultRow)2 TreeSet (java.util.TreeSet)2 ArrayNode (com.fasterxml.jackson.databind.node.ArrayNode)1 JsonNodeFactory (com.fasterxml.jackson.databind.node.JsonNodeFactory)1 ObjectNode (com.fasterxml.jackson.databind.node.ObjectNode)1 CacheLoader (com.google.common.cache.CacheLoader)1 LoadingCache (com.google.common.cache.LoadingCache)1 UI (com.vaadin.ui.UI)1 Range (de.catma.document.Range)1 KeywordInContext (de.catma.document.source.KeywordInContext)1 TermInfo (de.catma.indexer.TermInfo)1 QueryResult (de.catma.queryengine.result.QueryResult)1 OutputStreamWriter (java.io.OutputStreamWriter)1 PipedInputStream (java.io.PipedInputStream)1 PipedOutputStream (java.io.PipedOutputStream)1 List (java.util.List)1