Search in sources :

Example 1 with ResultCell

use of org.apache.nifi.hbase.scan.ResultCell in project nifi by apache.

the class HBase_1_1_2_ClientService method scan.

@Override
public void scan(final String tableName, final byte[] startRow, final byte[] endRow, final Collection<Column> columns, final ResultHandler handler) throws IOException {
    try (final Table table = connection.getTable(TableName.valueOf(tableName));
        final ResultScanner scanner = getResults(table, startRow, endRow, columns)) {
        for (final Result result : scanner) {
            final byte[] rowKey = result.getRow();
            final Cell[] cells = result.rawCells();
            if (cells == null) {
                continue;
            }
            // convert HBase cells to NiFi cells
            final ResultCell[] resultCells = new ResultCell[cells.length];
            for (int i = 0; i < cells.length; i++) {
                final Cell cell = cells[i];
                final ResultCell resultCell = getResultCell(cell);
                resultCells[i] = resultCell;
            }
            // delegate to the handler
            handler.handle(rowKey, resultCells);
        }
    }
}
Also used : Table(org.apache.hadoop.hbase.client.Table) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) ResultCell(org.apache.nifi.hbase.scan.ResultCell) Cell(org.apache.hadoop.hbase.Cell) ResultCell(org.apache.nifi.hbase.scan.ResultCell) Result(org.apache.hadoop.hbase.client.Result) ValidationResult(org.apache.nifi.components.ValidationResult)

Example 2 with ResultCell

use of org.apache.nifi.hbase.scan.ResultCell in project nifi by apache.

the class HBase_1_1_2_ClientService method scan.

@Override
public void scan(final String tableName, final String startRow, final String endRow, String filterExpression, final Long timerangeMin, final Long timerangeMax, final Integer limitRows, final Boolean isReversed, final Collection<Column> columns, final ResultHandler handler) throws IOException {
    try (final Table table = connection.getTable(TableName.valueOf(tableName));
        final ResultScanner scanner = getResults(table, startRow, endRow, filterExpression, timerangeMin, timerangeMax, limitRows, isReversed, columns)) {
        int cnt = 0;
        final int lim = limitRows != null ? limitRows : 0;
        for (final Result result : scanner) {
            if (lim > 0 && ++cnt > lim) {
                break;
            }
            final byte[] rowKey = result.getRow();
            final Cell[] cells = result.rawCells();
            if (cells == null) {
                continue;
            }
            // convert HBase cells to NiFi cells
            final ResultCell[] resultCells = new ResultCell[cells.length];
            for (int i = 0; i < cells.length; i++) {
                final Cell cell = cells[i];
                final ResultCell resultCell = getResultCell(cell);
                resultCells[i] = resultCell;
            }
            // delegate to the handler
            handler.handle(rowKey, resultCells);
        }
    }
}
Also used : Table(org.apache.hadoop.hbase.client.Table) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) ResultCell(org.apache.nifi.hbase.scan.ResultCell) Cell(org.apache.hadoop.hbase.Cell) ResultCell(org.apache.nifi.hbase.scan.ResultCell) Result(org.apache.hadoop.hbase.client.Result) ValidationResult(org.apache.nifi.components.ValidationResult)

Example 3 with ResultCell

use of org.apache.nifi.hbase.scan.ResultCell in project nifi by apache.

the class HBase_1_1_2_RecordLookupService method lookup.

@Override
public Optional<Record> lookup(Map<String, Object> coordinates) throws LookupFailureException {
    if (coordinates.get(ROW_KEY_KEY) == null) {
        return Optional.empty();
    }
    final String rowKey = coordinates.get(ROW_KEY_KEY).toString();
    if (StringUtils.isBlank(rowKey)) {
        return Optional.empty();
    }
    final byte[] rowKeyBytes = rowKey.getBytes(StandardCharsets.UTF_8);
    try {
        final Map<String, Object> values = new HashMap<>();
        hBaseClientService.scan(tableName, rowKeyBytes, rowKeyBytes, columns, (byte[] row, ResultCell[] resultCells) -> {
            for (final ResultCell cell : resultCells) {
                final byte[] qualifier = Arrays.copyOfRange(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierOffset() + cell.getQualifierLength());
                final byte[] value = Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), cell.getValueOffset() + cell.getValueLength());
                values.put(new String(qualifier, charset), new String(value, charset));
            }
        });
        if (values.size() > 0) {
            final List<RecordField> fields = new ArrayList<>();
            for (String key : values.keySet()) {
                fields.add(new RecordField(key, RecordFieldType.STRING.getDataType()));
            }
            final RecordSchema schema = new SimpleRecordSchema(fields);
            return Optional.ofNullable(new MapRecord(schema, values));
        } else {
            return Optional.empty();
        }
    } catch (IOException e) {
        getLogger().error("Error occurred loading {}", new Object[] { coordinates.get("rowKey") }, e);
        throw new LookupFailureException(e);
    }
}
Also used : SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) MapRecord(org.apache.nifi.serialization.record.MapRecord) RecordField(org.apache.nifi.serialization.record.RecordField) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ResultCell(org.apache.nifi.hbase.scan.ResultCell) IOException(java.io.IOException) LookupFailureException(org.apache.nifi.lookup.LookupFailureException) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema)

Example 4 with ResultCell

use of org.apache.nifi.hbase.scan.ResultCell in project nifi by apache.

the class TestHBase_1_1_2_ClientService method testScan.

@Test
public void testScan() throws InitializationException, IOException {
    final String tableName = "nifi";
    final TestRunner runner = TestRunners.newTestRunner(TestProcessor.class);
    // Mock an HBase Table so we can verify the put operations later
    final Table table = Mockito.mock(Table.class);
    when(table.getName()).thenReturn(TableName.valueOf(tableName));
    // create the controller service and link it to the test processor
    final MockHBaseClientService service = configureHBaseClientService(runner, table);
    runner.assertValid(service);
    // stage some results in the mock service...
    final long now = System.currentTimeMillis();
    final Map<String, String> cells = new HashMap<>();
    cells.put("greeting", "hello");
    cells.put("name", "nifi");
    service.addResult("row0", cells, now - 2);
    service.addResult("row1", cells, now - 1);
    service.addResult("row2", cells, now - 1);
    service.addResult("row3", cells, now);
    // perform a scan and verify the four rows were returned
    final CollectingResultHandler handler = new CollectingResultHandler();
    final HBaseClientService hBaseClientService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CLIENT_SERVICE).asControllerService(HBaseClientService.class);
    hBaseClientService.scan(tableName, new ArrayList<Column>(), null, now, handler);
    assertEquals(4, handler.results.size());
    // get row0 using the row id and verify it has 2 cells
    final ResultCell[] results = handler.results.get("row0");
    assertNotNull(results);
    assertEquals(2, results.length);
    verifyResultCell(results[0], COL_FAM, "greeting", "hello");
    verifyResultCell(results[1], COL_FAM, "name", "nifi");
}
Also used : Table(org.apache.hadoop.hbase.client.Table) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) TestRunner(org.apache.nifi.util.TestRunner) ResultCell(org.apache.nifi.hbase.scan.ResultCell) PutColumn(org.apache.nifi.hbase.put.PutColumn) Column(org.apache.nifi.hbase.scan.Column) Test(org.junit.Test)

Example 5 with ResultCell

use of org.apache.nifi.hbase.scan.ResultCell in project nifi by apache.

the class GetHBase method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    final String tableName = context.getProperty(TABLE_NAME).getValue();
    final String initialTimeRange = context.getProperty(INITIAL_TIMERANGE).getValue();
    final String filterExpression = context.getProperty(FILTER_EXPRESSION).getValue();
    final HBaseClientService hBaseClientService = context.getProperty(HBASE_CLIENT_SERVICE).asControllerService(HBaseClientService.class);
    // if the table was changed then remove any previous state
    if (previousTable != null && !tableName.equals(previousTable)) {
        try {
            context.getStateManager().clear(Scope.CLUSTER);
        } catch (final IOException ioe) {
            getLogger().warn("Failed to clear Cluster State", ioe);
        }
        previousTable = tableName;
    }
    try {
        final Charset charset = Charset.forName(context.getProperty(CHARSET).getValue());
        final RowSerializer serializer = new JsonRowSerializer(charset);
        this.lastResult = getState(context.getStateManager());
        final long defaultMinTime = (initialTimeRange.equals(NONE.getValue()) ? 0L : System.currentTimeMillis());
        final long minTime = (lastResult == null ? defaultMinTime : lastResult.getTimestamp());
        final Map<String, Set<String>> cellsMatchingTimestamp = new HashMap<>();
        final AtomicReference<Long> rowsPulledHolder = new AtomicReference<>(0L);
        final AtomicReference<Long> latestTimestampHolder = new AtomicReference<>(minTime);
        hBaseClientService.scan(tableName, columns, filterExpression, minTime, new ResultHandler() {

            @Override
            public void handle(final byte[] rowKey, final ResultCell[] resultCells) {
                final String rowKeyString = new String(rowKey, StandardCharsets.UTF_8);
                // check if latest cell timestamp is equal to our cutoff.
                // if any of the cells have a timestamp later than our cutoff, then we
                // want the row. But if the cell with the latest timestamp is equal to
                // our cutoff, then we want to check if that's one of the cells that
                // we have already seen.
                long latestCellTimestamp = 0L;
                for (final ResultCell cell : resultCells) {
                    if (cell.getTimestamp() > latestCellTimestamp) {
                        latestCellTimestamp = cell.getTimestamp();
                    }
                }
                // we've already seen this.
                if (latestCellTimestamp < minTime) {
                    getLogger().debug("latest cell timestamp for row {} is {}, which is earlier than the minimum time of {}", new Object[] { rowKeyString, latestCellTimestamp, minTime });
                    return;
                }
                if (latestCellTimestamp == minTime) {
                    // latest cell timestamp is equal to our minimum time. Check if all cells that have
                    // that timestamp are in our list of previously seen cells.
                    boolean allSeen = true;
                    for (final ResultCell cell : resultCells) {
                        if (cell.getTimestamp() == latestCellTimestamp) {
                            if (lastResult == null || !lastResult.contains(cell)) {
                                allSeen = false;
                                break;
                            }
                        }
                    }
                    if (allSeen) {
                        // we have already seen all of the cells for this row. We do not want to
                        // include this cell in our output.
                        getLogger().debug("all cells for row {} have already been seen", new Object[] { rowKeyString });
                        return;
                    }
                }
                // we can ignore these cells.
                if (latestCellTimestamp >= latestTimestampHolder.get()) {
                    // new timestamp, so clear all of the 'matching cells'
                    if (latestCellTimestamp > latestTimestampHolder.get()) {
                        latestTimestampHolder.set(latestCellTimestamp);
                        cellsMatchingTimestamp.clear();
                    }
                    for (final ResultCell cell : resultCells) {
                        final long ts = cell.getTimestamp();
                        if (ts == latestCellTimestamp) {
                            final byte[] rowValue = Arrays.copyOfRange(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength() + cell.getRowOffset());
                            final byte[] cellValue = Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength() + cell.getValueOffset());
                            final String rowHash = new String(rowValue, StandardCharsets.UTF_8);
                            Set<String> cellHashes = cellsMatchingTimestamp.get(rowHash);
                            if (cellHashes == null) {
                                cellHashes = new HashSet<>();
                                cellsMatchingTimestamp.put(rowHash, cellHashes);
                            }
                            cellHashes.add(new String(cellValue, StandardCharsets.UTF_8));
                        }
                    }
                }
                // write the row to a new FlowFile.
                FlowFile flowFile = session.create();
                flowFile = session.write(flowFile, new OutputStreamCallback() {

                    @Override
                    public void process(final OutputStream out) throws IOException {
                        serializer.serialize(rowKey, resultCells, out);
                    }
                });
                final Map<String, String> attributes = new HashMap<>();
                attributes.put("hbase.table", tableName);
                attributes.put("mime.type", "application/json");
                flowFile = session.putAllAttributes(flowFile, attributes);
                session.getProvenanceReporter().receive(flowFile, hBaseClientService.toTransitUri(tableName, rowKeyString));
                session.transfer(flowFile, REL_SUCCESS);
                getLogger().debug("Received {} from HBase with row key {}", new Object[] { flowFile, rowKeyString });
                // we could potentially have a huge number of rows. If we get to 500, go ahead and commit the
                // session so that we can avoid buffering tons of FlowFiles without ever sending any out.
                long rowsPulled = rowsPulledHolder.get();
                rowsPulledHolder.set(++rowsPulled);
                if (++rowsPulled % getBatchSize() == 0) {
                    session.commit();
                }
            }
        });
        final ScanResult scanResults = new ScanResult(latestTimestampHolder.get(), cellsMatchingTimestamp);
        // Commit session before we replace the lastResult; if session commit fails, we want
        // to pull these records again.
        session.commit();
        if (lastResult == null || scanResults.getTimestamp() > lastResult.getTimestamp()) {
            lastResult = scanResults;
        } else if (scanResults.getTimestamp() == lastResult.getTimestamp()) {
            final Map<String, Set<String>> combinedResults = new HashMap<>(scanResults.getMatchingCells());
            // do a deep copy because the Set may be modified below.
            for (final Map.Entry<String, Set<String>> entry : scanResults.getMatchingCells().entrySet()) {
                combinedResults.put(entry.getKey(), new HashSet<>(entry.getValue()));
            }
            // combined the results from 'lastResult'
            for (final Map.Entry<String, Set<String>> entry : lastResult.getMatchingCells().entrySet()) {
                final Set<String> existing = combinedResults.get(entry.getKey());
                if (existing == null) {
                    combinedResults.put(entry.getKey(), new HashSet<>(entry.getValue()));
                } else {
                    existing.addAll(entry.getValue());
                }
            }
            final ScanResult scanResult = new ScanResult(scanResults.getTimestamp(), combinedResults);
            lastResult = scanResult;
        }
        // save state using the framework's state manager
        storeState(lastResult, context.getStateManager());
    } catch (final IOException e) {
        getLogger().error("Failed to receive data from HBase due to {}", e);
        session.rollback();
    } finally {
        // if we failed, we want to yield so that we don't hammer hbase. If we succeed, then we have
        // pulled all of the records, so we want to wait a bit before hitting hbase again anyway.
        context.yield();
    }
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) HashMap(java.util.HashMap) OutputStream(java.io.OutputStream) ResultHandler(org.apache.nifi.hbase.scan.ResultHandler) ResultCell(org.apache.nifi.hbase.scan.ResultCell) JsonRowSerializer(org.apache.nifi.hbase.io.JsonRowSerializer) RowSerializer(org.apache.nifi.hbase.io.RowSerializer) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) HashSet(java.util.HashSet) FlowFile(org.apache.nifi.flowfile.FlowFile) Charset(java.nio.charset.Charset) AtomicReference(java.util.concurrent.atomic.AtomicReference) IOException(java.io.IOException) JsonRowSerializer(org.apache.nifi.hbase.io.JsonRowSerializer) Map(java.util.Map) StateMap(org.apache.nifi.components.state.StateMap) HashMap(java.util.HashMap)

Aggregations

ResultCell (org.apache.nifi.hbase.scan.ResultCell)17 HashMap (java.util.HashMap)4 Table (org.apache.hadoop.hbase.client.Table)4 IOException (java.io.IOException)3 Cell (org.apache.hadoop.hbase.Cell)3 Result (org.apache.hadoop.hbase.client.Result)3 ResultScanner (org.apache.hadoop.hbase.client.ResultScanner)3 ValidationResult (org.apache.nifi.components.ValidationResult)3 Before (org.junit.Before)3 ArrayList (java.util.ArrayList)2 Map (java.util.Map)2 PutColumn (org.apache.nifi.hbase.put.PutColumn)2 Column (org.apache.nifi.hbase.scan.Column)2 OutputStream (java.io.OutputStream)1 Charset (java.nio.charset.Charset)1 HashSet (java.util.HashSet)1 LinkedHashMap (java.util.LinkedHashMap)1 Set (java.util.Set)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 Filter (org.apache.hadoop.hbase.filter.Filter)1