Search in sources :

Example 21 with PutFlowFile

use of org.apache.nifi.hbase.put.PutFlowFile in project nifi by apache.

the class TestHBase_1_1_2_ClientService method testMultiplePutsDifferentRow.

@Test
public void testMultiplePutsDifferentRow() throws IOException, InitializationException {
    final String tableName = "nifi";
    final String row1 = "row1";
    final String row2 = "row2";
    final String columnFamily = "family1";
    final String columnQualifier = "qualifier1";
    final String content1 = "content1";
    final String content2 = "content2";
    final Collection<PutColumn> columns1 = Collections.singletonList(new PutColumn(columnFamily.getBytes(StandardCharsets.UTF_8), columnQualifier.getBytes(StandardCharsets.UTF_8), content1.getBytes(StandardCharsets.UTF_8)));
    final PutFlowFile putFlowFile1 = new PutFlowFile(tableName, row1.getBytes(StandardCharsets.UTF_8), columns1, null);
    final Collection<PutColumn> columns2 = Collections.singletonList(new PutColumn(columnFamily.getBytes(StandardCharsets.UTF_8), columnQualifier.getBytes(StandardCharsets.UTF_8), content2.getBytes(StandardCharsets.UTF_8)));
    final PutFlowFile putFlowFile2 = new PutFlowFile(tableName, row2.getBytes(StandardCharsets.UTF_8), columns2, null);
    final TestRunner runner = TestRunners.newTestRunner(TestProcessor.class);
    // Mock an HBase Table so we can verify the put operations later
    final Table table = Mockito.mock(Table.class);
    when(table.getName()).thenReturn(TableName.valueOf(tableName));
    // create the controller service and link it to the test processor
    final HBaseClientService service = configureHBaseClientService(runner, table);
    runner.assertValid(service);
    // try to put a multiple cells with different rows
    final HBaseClientService hBaseClientService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CLIENT_SERVICE).asControllerService(HBaseClientService.class);
    hBaseClientService.put(tableName, Arrays.asList(putFlowFile1, putFlowFile2));
    // verify put was only called once
    ArgumentCaptor<List> capture = ArgumentCaptor.forClass(List.class);
    verify(table, times(1)).put(capture.capture());
    // verify there were two puts in the list
    final List<Put> puts = capture.getValue();
    assertEquals(2, puts.size());
}
Also used : Table(org.apache.hadoop.hbase.client.Table) TestRunner(org.apache.nifi.util.TestRunner) PutColumn(org.apache.nifi.hbase.put.PutColumn) ArrayList(java.util.ArrayList) List(java.util.List) Put(org.apache.hadoop.hbase.client.Put) PutFlowFile(org.apache.nifi.hbase.put.PutFlowFile) Test(org.junit.Test)

Example 22 with PutFlowFile

use of org.apache.nifi.hbase.put.PutFlowFile in project nifi by apache.

the class PutHBaseJSON method createPut.

@Override
protected PutFlowFile createPut(final ProcessSession session, final ProcessContext context, final FlowFile flowFile) {
    final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
    final String rowId = context.getProperty(ROW_ID).evaluateAttributeExpressions(flowFile).getValue();
    final String rowFieldName = context.getProperty(ROW_FIELD_NAME).evaluateAttributeExpressions(flowFile).getValue();
    final String columnFamily = context.getProperty(COLUMN_FAMILY).evaluateAttributeExpressions(flowFile).getValue();
    final String timestampValue = context.getProperty(TIMESTAMP).evaluateAttributeExpressions(flowFile).getValue();
    final boolean extractRowId = !StringUtils.isBlank(rowFieldName);
    final String complexFieldStrategy = context.getProperty(COMPLEX_FIELD_STRATEGY).getValue();
    final String fieldEncodingStrategy = context.getProperty(FIELD_ENCODING_STRATEGY).getValue();
    final String rowIdEncodingStrategy = context.getProperty(ROW_ID_ENCODING_STRATEGY).getValue();
    final Long timestamp;
    if (!StringUtils.isBlank(timestampValue)) {
        try {
            timestamp = Long.valueOf(timestampValue);
        } catch (Exception e) {
            getLogger().error("Invalid timestamp value: " + timestampValue, e);
            return null;
        }
    } else {
        timestamp = null;
    }
    // Parse the JSON document
    final ObjectMapper mapper = new ObjectMapper();
    final AtomicReference<JsonNode> rootNodeRef = new AtomicReference<>(null);
    try {
        session.read(flowFile, new InputStreamCallback() {

            @Override
            public void process(final InputStream in) throws IOException {
                try (final InputStream bufferedIn = new BufferedInputStream(in)) {
                    rootNodeRef.set(mapper.readTree(bufferedIn));
                }
            }
        });
    } catch (final ProcessException pe) {
        getLogger().error("Failed to parse {} as JSON due to {}; routing to failure", new Object[] { flowFile, pe.toString() }, pe);
        return null;
    }
    final JsonNode rootNode = rootNodeRef.get();
    if (rootNode.isArray()) {
        getLogger().error("Root node of JSON must be a single document, found array for {}; routing to failure", new Object[] { flowFile });
        return null;
    }
    final Collection<PutColumn> columns = new ArrayList<>();
    final AtomicReference<String> rowIdHolder = new AtomicReference<>(null);
    // convert each field/value to a column for the put, skip over nulls and arrays
    final Iterator<String> fieldNames = rootNode.fieldNames();
    while (fieldNames.hasNext()) {
        final String fieldName = fieldNames.next();
        final AtomicReference<byte[]> fieldValueHolder = new AtomicReference<>(null);
        final JsonNode fieldNode = rootNode.get(fieldName);
        if (fieldNode.isNull()) {
            getLogger().debug("Skipping {} because value was null", new Object[] { fieldName });
        } else if (fieldNode.isValueNode()) {
            // for a value node we need to determine if we are storing the bytes of a string, or the bytes of actual types
            if (STRING_ENCODING_VALUE.equals(fieldEncodingStrategy)) {
                final byte[] valueBytes = clientService.toBytes(fieldNode.asText());
                fieldValueHolder.set(valueBytes);
            } else {
                fieldValueHolder.set(extractJNodeValue(fieldNode));
            }
        } else {
            // for non-null, non-value nodes, determine what to do based on the handling strategy
            switch(complexFieldStrategy) {
                case FAIL_VALUE:
                    getLogger().error("Complex value found for {}; routing to failure", new Object[] { fieldName });
                    return null;
                case WARN_VALUE:
                    getLogger().warn("Complex value found for {}; skipping", new Object[] { fieldName });
                    break;
                case TEXT_VALUE:
                    // use toString() here because asText() is only guaranteed to be supported on value nodes
                    // some other types of nodes, like ArrayNode, provide toString implementations
                    fieldValueHolder.set(clientService.toBytes(fieldNode.toString()));
                    break;
                case IGNORE_VALUE:
                    // silently skip
                    break;
                default:
                    break;
            }
        }
        // otherwise add a new column where the fieldName and fieldValue are the column qualifier and value
        if (fieldValueHolder.get() != null) {
            if (extractRowId && fieldName.equals(rowFieldName)) {
                rowIdHolder.set(fieldNode.asText());
            } else {
                final byte[] colFamBytes = columnFamily.getBytes(StandardCharsets.UTF_8);
                final byte[] colQualBytes = fieldName.getBytes(StandardCharsets.UTF_8);
                final byte[] colValBytes = fieldValueHolder.get();
                columns.add(new PutColumn(colFamBytes, colQualBytes, colValBytes, timestamp));
            }
        }
    }
    // log an error message so the user can see what the field names were and return null so it gets routed to failure
    if (extractRowId && rowIdHolder.get() == null) {
        final String fieldNameStr = StringUtils.join(rootNode.fieldNames(), ",");
        getLogger().error("Row ID field named '{}' not found in field names '{}'; routing to failure", new Object[] { rowFieldName, fieldNameStr });
        return null;
    }
    final String putRowId = (extractRowId ? rowIdHolder.get() : rowId);
    byte[] rowKeyBytes = getRow(putRowId, rowIdEncodingStrategy);
    return new PutFlowFile(tableName, rowKeyBytes, columns, flowFile);
}
Also used : BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) PutColumn(org.apache.nifi.hbase.put.PutColumn) ArrayList(java.util.ArrayList) JsonNode(com.fasterxml.jackson.databind.JsonNode) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) PutFlowFile(org.apache.nifi.hbase.put.PutFlowFile) ProcessException(org.apache.nifi.processor.exception.ProcessException) BufferedInputStream(java.io.BufferedInputStream) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Example 23 with PutFlowFile

use of org.apache.nifi.hbase.put.PutFlowFile in project nifi by apache.

the class PutHBaseRecord method addBatch.

private int addBatch(String tableName, List<PutFlowFile> flowFiles) throws IOException {
    int columns = 0;
    clientService.put(tableName, flowFiles);
    for (PutFlowFile put : flowFiles) {
        columns += put.getColumns().size();
    }
    return columns;
}
Also used : PutFlowFile(org.apache.nifi.hbase.put.PutFlowFile)

Example 24 with PutFlowFile

use of org.apache.nifi.hbase.put.PutFlowFile in project nifi by apache.

the class PutHBaseRecord method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final RecordReaderFactory recordParserFactory = context.getProperty(RECORD_READER_FACTORY).asControllerService(RecordReaderFactory.class);
    List<PutFlowFile> flowFiles = new ArrayList<>();
    final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
    final String rowFieldName = context.getProperty(ROW_FIELD_NAME).evaluateAttributeExpressions(flowFile).getValue();
    final String columnFamily = context.getProperty(COLUMN_FAMILY).evaluateAttributeExpressions(flowFile).getValue();
    final String timestampFieldName = context.getProperty(TIMESTAMP_FIELD_NAME).evaluateAttributeExpressions(flowFile).getValue();
    final String fieldEncodingStrategy = context.getProperty(FIELD_ENCODING_STRATEGY).getValue();
    final String complexFieldStrategy = context.getProperty(COMPLEX_FIELD_STRATEGY).getValue();
    final String rowEncodingStrategy = context.getProperty(ROW_ID_ENCODING_STRATEGY).getValue();
    final long start = System.nanoTime();
    int index = 0;
    int columns = 0;
    boolean failed = false;
    String startIndexStr = flowFile.getAttribute("restart.index");
    int startIndex = -1;
    if (startIndexStr != null) {
        startIndex = Integer.parseInt(startIndexStr);
    }
    PutFlowFile last = null;
    try (final InputStream in = session.read(flowFile);
        final RecordReader reader = recordParserFactory.createRecordReader(flowFile, in, getLogger())) {
        Record record;
        if (startIndex >= 0) {
            while (index++ < startIndex && (reader.nextRecord()) != null) {
            }
        }
        while ((record = reader.nextRecord()) != null) {
            PutFlowFile putFlowFile = createPut(context, record, reader.getSchema(), flowFile, rowFieldName, columnFamily, timestampFieldName, fieldEncodingStrategy, rowEncodingStrategy, complexFieldStrategy);
            if (putFlowFile.getColumns().size() == 0) {
                continue;
            }
            flowFiles.add(putFlowFile);
            index++;
            if (flowFiles.size() == batchSize) {
                columns += addBatch(tableName, flowFiles);
                last = flowFiles.get(flowFiles.size() - 1);
                flowFiles = new ArrayList<>();
            }
        }
        if (flowFiles.size() > 0) {
            columns += addBatch(tableName, flowFiles);
            last = flowFiles.get(flowFiles.size() - 1);
        }
    } catch (Exception ex) {
        getLogger().error("Failed to put records to HBase.", ex);
        failed = true;
    }
    if (!failed) {
        if (columns > 0) {
            sendProvenance(session, flowFile, columns, System.nanoTime() - start, last);
        }
        flowFile = session.removeAttribute(flowFile, "restart.index");
        session.transfer(flowFile, REL_SUCCESS);
    } else {
        String restartIndex = Integer.toString(index - flowFiles.size());
        flowFile = session.putAttribute(flowFile, "restart.index", restartIndex);
        if (columns > 0) {
            sendProvenance(session, flowFile, columns, System.nanoTime() - start, last);
        }
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_FAILURE);
    }
    session.commit();
}
Also used : PutFlowFile(org.apache.nifi.hbase.put.PutFlowFile) FlowFile(org.apache.nifi.flowfile.FlowFile) InputStream(java.io.InputStream) RecordReader(org.apache.nifi.serialization.RecordReader) ArrayList(java.util.ArrayList) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) IllegalTypeConversionException(org.apache.nifi.serialization.record.util.IllegalTypeConversionException) RecordReaderFactory(org.apache.nifi.serialization.RecordReaderFactory) PutFlowFile(org.apache.nifi.hbase.put.PutFlowFile) Record(org.apache.nifi.serialization.record.Record)

Example 25 with PutFlowFile

use of org.apache.nifi.hbase.put.PutFlowFile in project nifi by apache.

the class TestPutHBaseCell method testMultipleFlowFilesSameTableDifferentRow.

@Test
public void testMultipleFlowFilesSameTableDifferentRow() throws IOException, InitializationException {
    final String tableName = "nifi";
    final String row1 = "row1";
    final String row2 = "row2";
    final String columnFamily = "family1";
    final String columnQualifier = "qualifier1";
    final PutHBaseCell proc = new PutHBaseCell();
    final TestRunner runner = getTestRunnerWithEL(proc);
    final MockHBaseClientService hBaseClient = getHBaseClientService(runner);
    final String content1 = "some content1";
    final Map<String, String> attributes1 = getAttributeMapWithEL(tableName, row1, columnFamily, columnQualifier);
    runner.enqueue(content1.getBytes("UTF-8"), attributes1);
    final String content2 = "some content1";
    final Map<String, String> attributes2 = getAttributeMapWithEL(tableName, row2, columnFamily, columnQualifier);
    runner.enqueue(content2.getBytes("UTF-8"), attributes2);
    runner.run();
    runner.assertAllFlowFilesTransferred(PutHBaseCell.REL_SUCCESS);
    final MockFlowFile outFile = runner.getFlowFilesForRelationship(PutHBaseCell.REL_SUCCESS).get(0);
    outFile.assertContentEquals(content1);
    assertNotNull(hBaseClient.getFlowFilePuts());
    assertEquals(1, hBaseClient.getFlowFilePuts().size());
    List<PutFlowFile> puts = hBaseClient.getFlowFilePuts().get(tableName);
    assertEquals(2, puts.size());
    verifyPut(row1, columnFamily, columnQualifier, null, content1, puts.get(0));
    verifyPut(row2, columnFamily, columnQualifier, null, content2, puts.get(1));
    assertEquals(2, runner.getProvenanceEvents().size());
}
Also used : MockFlowFile(org.apache.nifi.util.MockFlowFile) TestRunner(org.apache.nifi.util.TestRunner) PutFlowFile(org.apache.nifi.hbase.put.PutFlowFile) Test(org.junit.Test)

Aggregations

PutFlowFile (org.apache.nifi.hbase.put.PutFlowFile)30 TestRunner (org.apache.nifi.util.TestRunner)22 Test (org.junit.Test)21 MockFlowFile (org.apache.nifi.util.MockFlowFile)15 HashMap (java.util.HashMap)14 PutColumn (org.apache.nifi.hbase.put.PutColumn)9 ArrayList (java.util.ArrayList)7 ProvenanceEventRecord (org.apache.nifi.provenance.ProvenanceEventRecord)5 List (java.util.List)4 Put (org.apache.hadoop.hbase.client.Put)4 Table (org.apache.hadoop.hbase.client.Table)4 IOException (java.io.IOException)3 InputStream (java.io.InputStream)3 Map (java.util.Map)3 ProcessException (org.apache.nifi.processor.exception.ProcessException)3 FlowFile (org.apache.nifi.flowfile.FlowFile)2 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)2 IllegalTypeConversionException (org.apache.nifi.serialization.record.util.IllegalTypeConversionException)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1