use of com.amazonaws.services.comprehend.model.BatchDetectDominantLanguageItemResult in project knime-cloud by knime.
the class LanguageOperation method processChunk.
/**
* Method to process one chunk with given texts.
*
* @param out RowOutput to push new rows to
* @param comprehendClient Comprehend client to send the requests
* @param numInputColumns Number of input columns
* @param rowBatch List containing rows
* @param texts Texts to process
* @param validRows List containing indices of valid rows
* @throws InterruptedException Thrown if execution is canceled
*/
@SuppressWarnings("null")
private static final void processChunk(final RowOutput out, final AmazonComprehend comprehendClient, final int numInputColumns, final List<DataRow> rowBatch, final List<String> texts, final Set<Integer> validRows) throws InterruptedException {
final BatchDetectDominantLanguageRequest detectDominantLanguageRequest;
final BatchDetectDominantLanguageResult detectDominantLanguageResult;
Iterator<BatchDetectDominantLanguageItemResult> results = null;
if (!texts.isEmpty()) {
detectDominantLanguageRequest = new BatchDetectDominantLanguageRequest().withTextList(texts);
detectDominantLanguageResult = comprehendClient.batchDetectDominantLanguage(detectDominantLanguageRequest);
results = detectDominantLanguageResult.getResultList().iterator();
}
final DataCell[] cells = new DataCell[numInputColumns + 3];
for (int i = 0; i < rowBatch.size(); i++) {
final DataRow row = rowBatch.get(i);
for (int j = 0; j < numInputColumns; j++) {
cells[j] = row.getCell(j);
}
if (validRows.contains(i)) {
long outputRowIndex = 0;
// Push rows (one per language) to the output.
for (final DominantLanguage dominantLang : results.next().getLanguages()) {
// Copy the results to the new columns in the output.
cells[numInputColumns] = new StringCell(code2Name(dominantLang.getLanguageCode()));
cells[numInputColumns + 1] = new StringCell(dominantLang.getLanguageCode());
cells[numInputColumns + 2] = new DoubleCell(dominantLang.getScore());
// Create a new data row and push it to the output container.
out.push(new DefaultRow(new RowKey(row.getKey().getString() + "_" + outputRowIndex++), cells));
}
} else {
Arrays.fill(cells, numInputColumns, numInputColumns + 3, DataType.getMissingCell());
out.push(new DefaultRow(new RowKey(row.getKey().getString() + "_" + 0), cells));
}
}
// Clean up
rowBatch.clear();
texts.clear();
validRows.clear();
}
Aggregations