use of org.apache.nifi.hbase.put.PutFlowFile in project nifi by apache.
the class TestHBase_1_1_2_ClientService method testMultiplePutsDifferentRow.
@Test
public void testMultiplePutsDifferentRow() throws IOException, InitializationException {
final String tableName = "nifi";
final String row1 = "row1";
final String row2 = "row2";
final String columnFamily = "family1";
final String columnQualifier = "qualifier1";
final String content1 = "content1";
final String content2 = "content2";
final Collection<PutColumn> columns1 = Collections.singletonList(new PutColumn(columnFamily.getBytes(StandardCharsets.UTF_8), columnQualifier.getBytes(StandardCharsets.UTF_8), content1.getBytes(StandardCharsets.UTF_8)));
final PutFlowFile putFlowFile1 = new PutFlowFile(tableName, row1.getBytes(StandardCharsets.UTF_8), columns1, null);
final Collection<PutColumn> columns2 = Collections.singletonList(new PutColumn(columnFamily.getBytes(StandardCharsets.UTF_8), columnQualifier.getBytes(StandardCharsets.UTF_8), content2.getBytes(StandardCharsets.UTF_8)));
final PutFlowFile putFlowFile2 = new PutFlowFile(tableName, row2.getBytes(StandardCharsets.UTF_8), columns2, null);
final TestRunner runner = TestRunners.newTestRunner(TestProcessor.class);
// Mock an HBase Table so we can verify the put operations later
final Table table = Mockito.mock(Table.class);
when(table.getName()).thenReturn(TableName.valueOf(tableName));
// create the controller service and link it to the test processor
final HBaseClientService service = configureHBaseClientService(runner, table);
runner.assertValid(service);
// try to put a multiple cells with different rows
final HBaseClientService hBaseClientService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CLIENT_SERVICE).asControllerService(HBaseClientService.class);
hBaseClientService.put(tableName, Arrays.asList(putFlowFile1, putFlowFile2));
// verify put was only called once
ArgumentCaptor<List> capture = ArgumentCaptor.forClass(List.class);
verify(table, times(1)).put(capture.capture());
// verify there were two puts in the list
final List<Put> puts = capture.getValue();
assertEquals(2, puts.size());
}
use of org.apache.nifi.hbase.put.PutFlowFile in project nifi by apache.
the class PutHBaseJSON method createPut.
@Override
protected PutFlowFile createPut(final ProcessSession session, final ProcessContext context, final FlowFile flowFile) {
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String rowId = context.getProperty(ROW_ID).evaluateAttributeExpressions(flowFile).getValue();
final String rowFieldName = context.getProperty(ROW_FIELD_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String columnFamily = context.getProperty(COLUMN_FAMILY).evaluateAttributeExpressions(flowFile).getValue();
final String timestampValue = context.getProperty(TIMESTAMP).evaluateAttributeExpressions(flowFile).getValue();
final boolean extractRowId = !StringUtils.isBlank(rowFieldName);
final String complexFieldStrategy = context.getProperty(COMPLEX_FIELD_STRATEGY).getValue();
final String fieldEncodingStrategy = context.getProperty(FIELD_ENCODING_STRATEGY).getValue();
final String rowIdEncodingStrategy = context.getProperty(ROW_ID_ENCODING_STRATEGY).getValue();
final Long timestamp;
if (!StringUtils.isBlank(timestampValue)) {
try {
timestamp = Long.valueOf(timestampValue);
} catch (Exception e) {
getLogger().error("Invalid timestamp value: " + timestampValue, e);
return null;
}
} else {
timestamp = null;
}
// Parse the JSON document
final ObjectMapper mapper = new ObjectMapper();
final AtomicReference<JsonNode> rootNodeRef = new AtomicReference<>(null);
try {
session.read(flowFile, new InputStreamCallback() {
@Override
public void process(final InputStream in) throws IOException {
try (final InputStream bufferedIn = new BufferedInputStream(in)) {
rootNodeRef.set(mapper.readTree(bufferedIn));
}
}
});
} catch (final ProcessException pe) {
getLogger().error("Failed to parse {} as JSON due to {}; routing to failure", new Object[] { flowFile, pe.toString() }, pe);
return null;
}
final JsonNode rootNode = rootNodeRef.get();
if (rootNode.isArray()) {
getLogger().error("Root node of JSON must be a single document, found array for {}; routing to failure", new Object[] { flowFile });
return null;
}
final Collection<PutColumn> columns = new ArrayList<>();
final AtomicReference<String> rowIdHolder = new AtomicReference<>(null);
// convert each field/value to a column for the put, skip over nulls and arrays
final Iterator<String> fieldNames = rootNode.fieldNames();
while (fieldNames.hasNext()) {
final String fieldName = fieldNames.next();
final AtomicReference<byte[]> fieldValueHolder = new AtomicReference<>(null);
final JsonNode fieldNode = rootNode.get(fieldName);
if (fieldNode.isNull()) {
getLogger().debug("Skipping {} because value was null", new Object[] { fieldName });
} else if (fieldNode.isValueNode()) {
// for a value node we need to determine if we are storing the bytes of a string, or the bytes of actual types
if (STRING_ENCODING_VALUE.equals(fieldEncodingStrategy)) {
final byte[] valueBytes = clientService.toBytes(fieldNode.asText());
fieldValueHolder.set(valueBytes);
} else {
fieldValueHolder.set(extractJNodeValue(fieldNode));
}
} else {
// for non-null, non-value nodes, determine what to do based on the handling strategy
switch(complexFieldStrategy) {
case FAIL_VALUE:
getLogger().error("Complex value found for {}; routing to failure", new Object[] { fieldName });
return null;
case WARN_VALUE:
getLogger().warn("Complex value found for {}; skipping", new Object[] { fieldName });
break;
case TEXT_VALUE:
// use toString() here because asText() is only guaranteed to be supported on value nodes
// some other types of nodes, like ArrayNode, provide toString implementations
fieldValueHolder.set(clientService.toBytes(fieldNode.toString()));
break;
case IGNORE_VALUE:
// silently skip
break;
default:
break;
}
}
// otherwise add a new column where the fieldName and fieldValue are the column qualifier and value
if (fieldValueHolder.get() != null) {
if (extractRowId && fieldName.equals(rowFieldName)) {
rowIdHolder.set(fieldNode.asText());
} else {
final byte[] colFamBytes = columnFamily.getBytes(StandardCharsets.UTF_8);
final byte[] colQualBytes = fieldName.getBytes(StandardCharsets.UTF_8);
final byte[] colValBytes = fieldValueHolder.get();
columns.add(new PutColumn(colFamBytes, colQualBytes, colValBytes, timestamp));
}
}
}
// log an error message so the user can see what the field names were and return null so it gets routed to failure
if (extractRowId && rowIdHolder.get() == null) {
final String fieldNameStr = StringUtils.join(rootNode.fieldNames(), ",");
getLogger().error("Row ID field named '{}' not found in field names '{}'; routing to failure", new Object[] { rowFieldName, fieldNameStr });
return null;
}
final String putRowId = (extractRowId ? rowIdHolder.get() : rowId);
byte[] rowKeyBytes = getRow(putRowId, rowIdEncodingStrategy);
return new PutFlowFile(tableName, rowKeyBytes, columns, flowFile);
}
use of org.apache.nifi.hbase.put.PutFlowFile in project nifi by apache.
the class PutHBaseRecord method addBatch.
private int addBatch(String tableName, List<PutFlowFile> flowFiles) throws IOException {
int columns = 0;
clientService.put(tableName, flowFiles);
for (PutFlowFile put : flowFiles) {
columns += put.getColumns().size();
}
return columns;
}
use of org.apache.nifi.hbase.put.PutFlowFile in project nifi by apache.
the class PutHBaseRecord method onTrigger.
@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
final RecordReaderFactory recordParserFactory = context.getProperty(RECORD_READER_FACTORY).asControllerService(RecordReaderFactory.class);
List<PutFlowFile> flowFiles = new ArrayList<>();
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String rowFieldName = context.getProperty(ROW_FIELD_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String columnFamily = context.getProperty(COLUMN_FAMILY).evaluateAttributeExpressions(flowFile).getValue();
final String timestampFieldName = context.getProperty(TIMESTAMP_FIELD_NAME).evaluateAttributeExpressions(flowFile).getValue();
final String fieldEncodingStrategy = context.getProperty(FIELD_ENCODING_STRATEGY).getValue();
final String complexFieldStrategy = context.getProperty(COMPLEX_FIELD_STRATEGY).getValue();
final String rowEncodingStrategy = context.getProperty(ROW_ID_ENCODING_STRATEGY).getValue();
final long start = System.nanoTime();
int index = 0;
int columns = 0;
boolean failed = false;
String startIndexStr = flowFile.getAttribute("restart.index");
int startIndex = -1;
if (startIndexStr != null) {
startIndex = Integer.parseInt(startIndexStr);
}
PutFlowFile last = null;
try (final InputStream in = session.read(flowFile);
final RecordReader reader = recordParserFactory.createRecordReader(flowFile, in, getLogger())) {
Record record;
if (startIndex >= 0) {
while (index++ < startIndex && (reader.nextRecord()) != null) {
}
}
while ((record = reader.nextRecord()) != null) {
PutFlowFile putFlowFile = createPut(context, record, reader.getSchema(), flowFile, rowFieldName, columnFamily, timestampFieldName, fieldEncodingStrategy, rowEncodingStrategy, complexFieldStrategy);
if (putFlowFile.getColumns().size() == 0) {
continue;
}
flowFiles.add(putFlowFile);
index++;
if (flowFiles.size() == batchSize) {
columns += addBatch(tableName, flowFiles);
last = flowFiles.get(flowFiles.size() - 1);
flowFiles = new ArrayList<>();
}
}
if (flowFiles.size() > 0) {
columns += addBatch(tableName, flowFiles);
last = flowFiles.get(flowFiles.size() - 1);
}
} catch (Exception ex) {
getLogger().error("Failed to put records to HBase.", ex);
failed = true;
}
if (!failed) {
if (columns > 0) {
sendProvenance(session, flowFile, columns, System.nanoTime() - start, last);
}
flowFile = session.removeAttribute(flowFile, "restart.index");
session.transfer(flowFile, REL_SUCCESS);
} else {
String restartIndex = Integer.toString(index - flowFiles.size());
flowFile = session.putAttribute(flowFile, "restart.index", restartIndex);
if (columns > 0) {
sendProvenance(session, flowFile, columns, System.nanoTime() - start, last);
}
flowFile = session.penalize(flowFile);
session.transfer(flowFile, REL_FAILURE);
}
session.commit();
}
use of org.apache.nifi.hbase.put.PutFlowFile in project nifi by apache.
the class TestPutHBaseCell method testMultipleFlowFilesSameTableDifferentRow.
@Test
public void testMultipleFlowFilesSameTableDifferentRow() throws IOException, InitializationException {
final String tableName = "nifi";
final String row1 = "row1";
final String row2 = "row2";
final String columnFamily = "family1";
final String columnQualifier = "qualifier1";
final PutHBaseCell proc = new PutHBaseCell();
final TestRunner runner = getTestRunnerWithEL(proc);
final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
final String content1 = "some content1";
final Map<String, String> attributes1 = getAttributeMapWithEL(tableName, row1, columnFamily, columnQualifier);
runner.enqueue(content1.getBytes("UTF-8"), attributes1);
final String content2 = "some content1";
final Map<String, String> attributes2 = getAttributeMapWithEL(tableName, row2, columnFamily, columnQualifier);
runner.enqueue(content2.getBytes("UTF-8"), attributes2);
runner.run();
runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_SUCCESS);
final MockFlowFile outFile = runner.getFlowFilesForRelationship(PutHBaseCell.REL_SUCCESS).get(0);
outFile.assertContentEquals(content1);
assertNotNull(hBaseClient.getFlowFilePuts());
assertEquals(1, hBaseClient.getFlowFilePuts().size());
List<PutFlowFile> puts = hBaseClient.getFlowFilePuts().get(tableName);
assertEquals(2, puts.size());
verifyPut(row1, columnFamily, columnQualifier, null, content1, puts.get(0));
verifyPut(row2, columnFamily, columnQualifier, null, content2, puts.get(1));
assertEquals(2, runner.getProvenanceEvents().size());
}
Aggregations