Search in sources :

Example 1 with PutFlowFile

use of org.apache.nifi.hbase.put.PutFlowFile in project nifi by apache.

the class HBase_1_1_2_ClientService method put.

@Override
public void put(final String tableName, final Collection<PutFlowFile> puts) throws IOException {
    try (final Table table = connection.getTable(TableName.valueOf(tableName))) {
        // Create one Put per row....
        final Map<String, Put> rowPuts = new HashMap<>();
        for (final PutFlowFile putFlowFile : puts) {
            // this is used for the map key as a byte[] does not work as a key.
            final String rowKeyString = new String(putFlowFile.getRow(), StandardCharsets.UTF_8);
            Put put = rowPuts.get(rowKeyString);
            if (put == null) {
                put = new Put(putFlowFile.getRow());
                rowPuts.put(rowKeyString, put);
            }
            for (final PutColumn column : putFlowFile.getColumns()) {
                if (column.getTimestamp() != null) {
                    put.addColumn(column.getColumnFamily(), column.getColumnQualifier(), column.getTimestamp(), column.getBuffer());
                } else {
                    put.addColumn(column.getColumnFamily(), column.getColumnQualifier(), column.getBuffer());
                }
            }
        }
        table.put(new ArrayList<>(rowPuts.values()));
    }
}
Also used : Table(org.apache.hadoop.hbase.client.Table) HashMap(java.util.HashMap) PutColumn(org.apache.nifi.hbase.put.PutColumn) Put(org.apache.hadoop.hbase.client.Put) PutFlowFile(org.apache.nifi.hbase.put.PutFlowFile)

Example 2 with PutFlowFile

use of org.apache.nifi.hbase.put.PutFlowFile in project nifi by apache.

the class TestHBase_1_1_2_ClientService method testMultiplePutsSameRow.

@Test
public void testMultiplePutsSameRow() throws IOException, InitializationException {
    final String tableName = "nifi";
    final String row = "row1";
    final String columnFamily = "family1";
    final String columnQualifier = "qualifier1";
    final String content1 = "content1";
    final String content2 = "content2";
    final Collection<PutColumn> columns1 = Collections.singletonList(new PutColumn(columnFamily.getBytes(StandardCharsets.UTF_8), columnQualifier.getBytes(StandardCharsets.UTF_8), content1.getBytes(StandardCharsets.UTF_8)));
    final PutFlowFile putFlowFile1 = new PutFlowFile(tableName, row.getBytes(StandardCharsets.UTF_8), columns1, null);
    final Collection<PutColumn> columns2 = Collections.singletonList(new PutColumn(columnFamily.getBytes(StandardCharsets.UTF_8), columnQualifier.getBytes(StandardCharsets.UTF_8), content2.getBytes(StandardCharsets.UTF_8)));
    final PutFlowFile putFlowFile2 = new PutFlowFile(tableName, row.getBytes(StandardCharsets.UTF_8), columns2, null);
    final TestRunner runner = TestRunners.newTestRunner(TestProcessor.class);
    // Mock an HBase Table so we can verify the put operations later
    final Table table = Mockito.mock(Table.class);
    when(table.getName()).thenReturn(TableName.valueOf(tableName));
    // create the controller service and link it to the test processor
    final HBaseClientService service = configureHBaseClientService(runner, table);
    runner.assertValid(service);
    // try to put a multiple cells for the same row
    final HBaseClientService hBaseClientService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CLIENT_SERVICE).asControllerService(HBaseClientService.class);
    hBaseClientService.put(tableName, Arrays.asList(putFlowFile1, putFlowFile2));
    // verify put was only called once
    ArgumentCaptor<List> capture = ArgumentCaptor.forClass(List.class);
    verify(table, times(1)).put(capture.capture());
    // verify there was only one put in the list of puts
    final List<Put> puts = capture.getValue();
    assertEquals(1, puts.size());
    // verify two cells were added to this one put operation
    final NavigableMap<byte[], List<Cell>> familyCells = puts.get(0).getFamilyCellMap();
    Map.Entry<byte[], List<Cell>> entry = familyCells.firstEntry();
    assertEquals(2, entry.getValue().size());
}
Also used : Table(org.apache.hadoop.hbase.client.Table) TestRunner(org.apache.nifi.util.TestRunner) PutColumn(org.apache.nifi.hbase.put.PutColumn) Put(org.apache.hadoop.hbase.client.Put) PutFlowFile(org.apache.nifi.hbase.put.PutFlowFile) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) NavigableMap(java.util.NavigableMap) Test(org.junit.Test)

Example 3 with PutFlowFile

use of org.apache.nifi.hbase.put.PutFlowFile in project nifi by apache.

the class TestHBase_1_1_2_ClientService method testSinglePut.

@Test
public void testSinglePut() throws InitializationException, IOException {
    final String tableName = "nifi";
    final String row = "row1";
    final String columnFamily = "family1";
    final String columnQualifier = "qualifier1";
    final String content = "content1";
    final Collection<PutColumn> columns = Collections.singletonList(new PutColumn(columnFamily.getBytes(StandardCharsets.UTF_8), columnQualifier.getBytes(StandardCharsets.UTF_8), content.getBytes(StandardCharsets.UTF_8)));
    final PutFlowFile putFlowFile = new PutFlowFile(tableName, row.getBytes(StandardCharsets.UTF_8), columns, null);
    final TestRunner runner = TestRunners.newTestRunner(TestProcessor.class);
    // Mock an HBase Table so we can verify the put operations later
    final Table table = Mockito.mock(Table.class);
    when(table.getName()).thenReturn(TableName.valueOf(tableName));
    // create the controller service and link it to the test processor
    final HBaseClientService service = configureHBaseClientService(runner, table);
    runner.assertValid(service);
    // try to put a single cell
    final HBaseClientService hBaseClientService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CLIENT_SERVICE).asControllerService(HBaseClientService.class);
    hBaseClientService.put(tableName, Arrays.asList(putFlowFile));
    // verify only one call to put was made
    ArgumentCaptor<List> capture = ArgumentCaptor.forClass(List.class);
    verify(table, times(1)).put(capture.capture());
    // verify only one put was in the list of puts
    final List<Put> puts = capture.getValue();
    assertEquals(1, puts.size());
    verifyPut(row, columnFamily, columnQualifier, content, puts.get(0));
}
Also used : Table(org.apache.hadoop.hbase.client.Table) TestRunner(org.apache.nifi.util.TestRunner) PutColumn(org.apache.nifi.hbase.put.PutColumn) ArrayList(java.util.ArrayList) List(java.util.List) Put(org.apache.hadoop.hbase.client.Put) PutFlowFile(org.apache.nifi.hbase.put.PutFlowFile) Test(org.junit.Test)

Example 4 with PutFlowFile

use of org.apache.nifi.hbase.put.PutFlowFile in project nifi by apache.

the class AbstractPutHBase method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final int batchSize = context.getProperty(BATCH_SIZE).asInteger();
    List<FlowFile> flowFiles = session.get(batchSize);
    if (flowFiles == null || flowFiles.size() == 0) {
        return;
    }
    final Map<String, List<PutFlowFile>> tablePuts = new HashMap<>();
    // Group FlowFiles by HBase Table
    for (final FlowFile flowFile : flowFiles) {
        final PutFlowFile putFlowFile = createPut(session, context, flowFile);
        if (putFlowFile == null) {
            // sub-classes should log appropriate error messages before returning null
            session.transfer(flowFile, REL_FAILURE);
        } else if (!putFlowFile.isValid()) {
            if (StringUtils.isBlank(putFlowFile.getTableName())) {
                getLogger().error("Missing table name for FlowFile {}; routing to failure", new Object[] { flowFile });
            } else if (null == putFlowFile.getRow()) {
                getLogger().error("Missing row id for FlowFile {}; routing to failure", new Object[] { flowFile });
            } else if (putFlowFile.getColumns() == null || putFlowFile.getColumns().isEmpty()) {
                getLogger().error("No columns provided for FlowFile {}; routing to failure", new Object[] { flowFile });
            } else {
                // really shouldn't get here, but just in case
                getLogger().error("Failed to produce a put for FlowFile {}; routing to failure", new Object[] { flowFile });
            }
            session.transfer(flowFile, REL_FAILURE);
        } else {
            List<PutFlowFile> putFlowFiles = tablePuts.get(putFlowFile.getTableName());
            if (putFlowFiles == null) {
                putFlowFiles = new ArrayList<>();
                tablePuts.put(putFlowFile.getTableName(), putFlowFiles);
            }
            putFlowFiles.add(putFlowFile);
        }
    }
    getLogger().debug("Sending {} FlowFiles to HBase in {} put operations", new Object[] { flowFiles.size(), tablePuts.size() });
    final long start = System.nanoTime();
    final List<PutFlowFile> successes = new ArrayList<>();
    for (Map.Entry<String, List<PutFlowFile>> entry : tablePuts.entrySet()) {
        try {
            clientService.put(entry.getKey(), entry.getValue());
            successes.addAll(entry.getValue());
        } catch (Exception e) {
            getLogger().error(e.getMessage(), e);
            for (PutFlowFile putFlowFile : entry.getValue()) {
                getLogger().error("Failed to send {} to HBase due to {}; routing to failure", new Object[] { putFlowFile.getFlowFile(), e });
                final FlowFile failure = session.penalize(putFlowFile.getFlowFile());
                session.transfer(failure, REL_FAILURE);
            }
        }
    }
    final long sendMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
    getLogger().debug("Sent {} FlowFiles to HBase successfully in {} milliseconds", new Object[] { successes.size(), sendMillis });
    for (PutFlowFile putFlowFile : successes) {
        session.transfer(putFlowFile.getFlowFile(), REL_SUCCESS);
        final String details = "Put " + putFlowFile.getColumns().size() + " cells to HBase";
        session.getProvenanceReporter().send(putFlowFile.getFlowFile(), getTransitUri(putFlowFile), details, sendMillis);
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) PutFlowFile(org.apache.nifi.hbase.put.PutFlowFile) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ProcessException(org.apache.nifi.processor.exception.ProcessException) PutFlowFile(org.apache.nifi.hbase.put.PutFlowFile) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Example 5 with PutFlowFile

use of org.apache.nifi.hbase.put.PutFlowFile in project nifi by apache.

the class PutHBaseCell method createPut.

@Override
protected PutFlowFile createPut(final ProcessSession session, final ProcessContext context, final FlowFile flowFile) {
    final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
    final String row = context.getProperty(ROW_ID).evaluateAttributeExpressions(flowFile).getValue();
    final String columnFamily = context.getProperty(COLUMN_FAMILY).evaluateAttributeExpressions(flowFile).getValue();
    final String columnQualifier = context.getProperty(COLUMN_QUALIFIER).evaluateAttributeExpressions(flowFile).getValue();
    final String timestampValue = context.getProperty(TIMESTAMP).evaluateAttributeExpressions(flowFile).getValue();
    final Long timestamp;
    if (!StringUtils.isBlank(timestampValue)) {
        try {
            timestamp = Long.valueOf(timestampValue);
        } catch (Exception e) {
            getLogger().error("Invalid timestamp value: " + timestampValue, e);
            return null;
        }
    } else {
        timestamp = null;
    }
    final byte[] buffer = new byte[(int) flowFile.getSize()];
    session.read(flowFile, new InputStreamCallback() {

        @Override
        public void process(final InputStream in) throws IOException {
            StreamUtils.fillBuffer(in, buffer);
        }
    });
    final Collection<PutColumn> columns = Collections.singletonList(new PutColumn(columnFamily.getBytes(StandardCharsets.UTF_8), columnQualifier.getBytes(StandardCharsets.UTF_8), buffer, timestamp));
    byte[] rowKeyBytes = getRow(row, context.getProperty(ROW_ID_ENCODING_STRATEGY).getValue());
    return new PutFlowFile(tableName, rowKeyBytes, columns, flowFile);
}
Also used : InputStream(java.io.InputStream) PutColumn(org.apache.nifi.hbase.put.PutColumn) InputStreamCallback(org.apache.nifi.processor.io.InputStreamCallback) IOException(java.io.IOException) IOException(java.io.IOException) PutFlowFile(org.apache.nifi.hbase.put.PutFlowFile)

Aggregations

PutFlowFile (org.apache.nifi.hbase.put.PutFlowFile)30 TestRunner (org.apache.nifi.util.TestRunner)22 Test (org.junit.Test)21 MockFlowFile (org.apache.nifi.util.MockFlowFile)15 HashMap (java.util.HashMap)14 PutColumn (org.apache.nifi.hbase.put.PutColumn)9 ArrayList (java.util.ArrayList)7 ProvenanceEventRecord (org.apache.nifi.provenance.ProvenanceEventRecord)5 List (java.util.List)4 Put (org.apache.hadoop.hbase.client.Put)4 Table (org.apache.hadoop.hbase.client.Table)4 IOException (java.io.IOException)3 InputStream (java.io.InputStream)3 Map (java.util.Map)3 ProcessException (org.apache.nifi.processor.exception.ProcessException)3 FlowFile (org.apache.nifi.flowfile.FlowFile)2 InputStreamCallback (org.apache.nifi.processor.io.InputStreamCallback)2 IllegalTypeConversionException (org.apache.nifi.serialization.record.util.IllegalTypeConversionException)2 JsonNode (com.fasterxml.jackson.databind.JsonNode)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1