Search in sources :

Example 1 with BatchDetectDominantLanguageRequest

use of com.amazonaws.services.comprehend.model.BatchDetectDominantLanguageRequest in project knime-cloud by knime.

the class LanguageOperation method processChunk.

/**
 * Method to process one chunk with given texts.
 *
 * @param out RowOutput to push new rows to
 * @param comprehendClient Comprehend client to send the requests
 * @param numInputColumns Number of input columns
 * @param rowBatch List containing rows
 * @param texts Texts to process
 * @param validRows List containing indices of valid rows
 * @throws InterruptedException Thrown if execution is canceled
 */
@SuppressWarnings("null")
private static final void processChunk(final RowOutput out, final AmazonComprehend comprehendClient, final int numInputColumns, final List<DataRow> rowBatch, final List<String> texts, final Set<Integer> validRows) throws InterruptedException {
    final BatchDetectDominantLanguageRequest detectDominantLanguageRequest;
    final BatchDetectDominantLanguageResult detectDominantLanguageResult;
    Iterator<BatchDetectDominantLanguageItemResult> results = null;
    if (!texts.isEmpty()) {
        detectDominantLanguageRequest = new BatchDetectDominantLanguageRequest().withTextList(texts);
        detectDominantLanguageResult = comprehendClient.batchDetectDominantLanguage(detectDominantLanguageRequest);
        results = detectDominantLanguageResult.getResultList().iterator();
    }
    final DataCell[] cells = new DataCell[numInputColumns + 3];
    for (int i = 0; i < rowBatch.size(); i++) {
        final DataRow row = rowBatch.get(i);
        for (int j = 0; j < numInputColumns; j++) {
            cells[j] = row.getCell(j);
        }
        if (validRows.contains(i)) {
            long outputRowIndex = 0;
            // Push rows (one per language) to the output.
            for (final DominantLanguage dominantLang : results.next().getLanguages()) {
                // Copy the results to the new columns in the output.
                cells[numInputColumns] = new StringCell(code2Name(dominantLang.getLanguageCode()));
                cells[numInputColumns + 1] = new StringCell(dominantLang.getLanguageCode());
                cells[numInputColumns + 2] = new DoubleCell(dominantLang.getScore());
                // Create a new data row and push it to the output container.
                out.push(new DefaultRow(new RowKey(row.getKey().getString() + "_" + outputRowIndex++), cells));
            }
        } else {
            Arrays.fill(cells, numInputColumns, numInputColumns + 3, DataType.getMissingCell());
            out.push(new DefaultRow(new RowKey(row.getKey().getString() + "_" + 0), cells));
        }
    }
    // Clean up
    rowBatch.clear();
    texts.clear();
    validRows.clear();
}
Also used : RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) BatchDetectDominantLanguageRequest(com.amazonaws.services.comprehend.model.BatchDetectDominantLanguageRequest) BatchDetectDominantLanguageItemResult(com.amazonaws.services.comprehend.model.BatchDetectDominantLanguageItemResult) BatchDetectDominantLanguageResult(com.amazonaws.services.comprehend.model.BatchDetectDominantLanguageResult) DataRow(org.knime.core.data.DataRow) DominantLanguage(com.amazonaws.services.comprehend.model.DominantLanguage) StringCell(org.knime.core.data.def.StringCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Aggregations

BatchDetectDominantLanguageItemResult (com.amazonaws.services.comprehend.model.BatchDetectDominantLanguageItemResult)1 BatchDetectDominantLanguageRequest (com.amazonaws.services.comprehend.model.BatchDetectDominantLanguageRequest)1 BatchDetectDominantLanguageResult (com.amazonaws.services.comprehend.model.BatchDetectDominantLanguageResult)1 DominantLanguage (com.amazonaws.services.comprehend.model.DominantLanguage)1 DataCell (org.knime.core.data.DataCell)1 DataRow (org.knime.core.data.DataRow)1 RowKey (org.knime.core.data.RowKey)1 DefaultRow (org.knime.core.data.def.DefaultRow)1 DoubleCell (org.knime.core.data.def.DoubleCell)1 StringCell (org.knime.core.data.def.StringCell)1