Search in sources :

Example 6 with Column

use of org.apache.nifi.hbase.scan.Column in project nifi by apache.

the class ScanHBase method getColumns.

/**
 * @param columnsValue a String in the form colFam:colQual,colFam:colQual
 * @return a list of Columns based on parsing the given String
 */
private List<Column> getColumns(final String columnsValue) {
    final String[] columns = (columnsValue == null || columnsValue.isEmpty() ? new String[0] : columnsValue.split(","));
    List<Column> columnsList = new ArrayList<>(columns.length);
    for (final String column : columns) {
        if (column.contains(":")) {
            final String[] parts = column.split(":");
            final byte[] cf = parts[0].getBytes(StandardCharsets.UTF_8);
            final byte[] cq = parts[1].getBytes(StandardCharsets.UTF_8);
            columnsList.add(new Column(cf, cq));
        } else {
            final byte[] cf = column.getBytes(StandardCharsets.UTF_8);
            columnsList.add(new Column(cf, null));
        }
    }
    return columnsList;
}
Also used : Column(org.apache.nifi.hbase.scan.Column) ArrayList(java.util.ArrayList)

Example 7 with Column

use of org.apache.nifi.hbase.scan.Column in project nifi by apache.

the class ScanHBase method onTrigger.

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    try {
        final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
        if (StringUtils.isBlank(tableName)) {
            getLogger().error("Table Name is blank or null for {}, transferring to failure", new Object[] { flowFile });
            session.transfer(session.penalize(flowFile), REL_FAILURE);
            return;
        }
        final String startRow = context.getProperty(START_ROW).evaluateAttributeExpressions(flowFile).getValue();
        final String endRow = context.getProperty(END_ROW).evaluateAttributeExpressions(flowFile).getValue();
        final String filterExpression = context.getProperty(FILTER_EXPRESSION).evaluateAttributeExpressions(flowFile).getValue();
        // evaluate and validate time range min and max values. They both should be either empty or provided.
        Long timerangeMin = null;
        Long timerangeMax = null;
        try {
            timerangeMin = context.getProperty(TIME_RANGE_MIN).evaluateAttributeExpressions(flowFile).asLong();
        } catch (Exception e) {
            getLogger().error("Time range min value is not a number ({}) for {}, transferring to failure", new Object[] { context.getProperty(TIME_RANGE_MIN).evaluateAttributeExpressions(flowFile).getValue(), flowFile });
            session.transfer(session.penalize(flowFile), REL_FAILURE);
            return;
        }
        try {
            timerangeMax = context.getProperty(TIME_RANGE_MAX).evaluateAttributeExpressions(flowFile).asLong();
        } catch (Exception e) {
            getLogger().error("Time range max value is not a number ({}) for {}, transferring to failure", new Object[] { context.getProperty(TIME_RANGE_MAX).evaluateAttributeExpressions(flowFile).getValue(), flowFile });
            session.transfer(session.penalize(flowFile), REL_FAILURE);
            return;
        }
        if (timerangeMin == null && timerangeMax != null) {
            getLogger().error("Time range min value cannot be blank when max value provided for {}, transferring to failure", new Object[] { flowFile });
            session.transfer(session.penalize(flowFile), REL_FAILURE);
            return;
        } else if (timerangeMin != null && timerangeMax == null) {
            getLogger().error("Time range max value cannot be blank when min value provided for {}, transferring to failure", new Object[] { flowFile });
            session.transfer(session.penalize(flowFile), REL_FAILURE);
            return;
        }
        final Integer limitRows = context.getProperty(LIMIT_ROWS).evaluateAttributeExpressions(flowFile).asInteger();
        final Boolean isReversed = context.getProperty(REVERSED_SCAN).asBoolean();
        final Integer bulkSize = context.getProperty(BULK_SIZE).evaluateAttributeExpressions(flowFile).asInteger();
        final List<Column> columns = getColumns(context.getProperty(COLUMNS).evaluateAttributeExpressions(flowFile).getValue());
        final HBaseClientService hBaseClientService = context.getProperty(HBASE_CLIENT_SERVICE).asControllerService(HBaseClientService.class);
        final AtomicReference<Long> rowsPulledHolder = new AtomicReference<>(0L);
        final AtomicReference<Long> ffCountHolder = new AtomicReference<>(0L);
        ScanHBaseResultHandler handler = new ScanHBaseResultHandler(context, session, flowFile, rowsPulledHolder, ffCountHolder, hBaseClientService, tableName, bulkSize);
        try {
            hBaseClientService.scan(tableName, startRow, endRow, filterExpression, timerangeMin, timerangeMax, limitRows, isReversed, columns, handler);
        } catch (Exception e) {
            if (handler.getFlowFile() != null) {
                session.remove(handler.getFlowFile());
            }
            getLogger().error("Unable to fetch rows from HBase table {} due to {}", new Object[] { tableName, e });
            flowFile = session.putAttribute(flowFile, "scanhbase.results.found", Boolean.toString(handler.isHandledAny()));
            session.transfer(flowFile, REL_FAILURE);
            return;
        }
        flowFile = session.putAttribute(flowFile, "scanhbase.results.found", Boolean.toString(handler.isHandledAny()));
        FlowFile openedFF = handler.getFlowFile();
        if (openedFF != null) {
            finalizeFlowFile(session, hBaseClientService, openedFF, tableName, handler.getRecordsCount(), null);
        }
        session.transfer(flowFile, REL_ORIGINAL);
        session.commit();
    } catch (final Exception e) {
        getLogger().error("Failed to receive data from HBase due to {}", e);
        session.rollback();
        // if we failed, we want to yield so that we don't hammer hbase.
        context.yield();
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) AtomicReference(java.util.concurrent.atomic.AtomicReference) ProcessException(org.apache.nifi.processor.exception.ProcessException) IOException(java.io.IOException) Column(org.apache.nifi.hbase.scan.Column)

Example 8 with Column

use of org.apache.nifi.hbase.scan.Column in project nifi by apache.

the class HBase_1_1_2_ClientService method getResults.

// 
protected ResultScanner getResults(final Table table, final String startRow, final String endRow, final String filterExpression, final Long timerangeMin, final Long timerangeMax, final Integer limitRows, final Boolean isReversed, final Collection<Column> columns) throws IOException {
    final Scan scan = new Scan();
    if (!StringUtils.isBlank(startRow)) {
        scan.setStartRow(startRow.getBytes(StandardCharsets.UTF_8));
    }
    if (!StringUtils.isBlank(endRow)) {
        scan.setStopRow(endRow.getBytes(StandardCharsets.UTF_8));
    }
    Filter filter = null;
    if (columns != null) {
        for (Column col : columns) {
            if (col.getQualifier() == null) {
                scan.addFamily(col.getFamily());
            } else {
                scan.addColumn(col.getFamily(), col.getQualifier());
            }
        }
    }
    if (!StringUtils.isBlank(filterExpression)) {
        ParseFilter parseFilter = new ParseFilter();
        filter = parseFilter.parseFilterString(filterExpression);
    }
    if (filter != null) {
        scan.setFilter(filter);
    }
    if (timerangeMin != null && timerangeMax != null) {
        scan.setTimeRange(timerangeMin, timerangeMax);
    }
    if (isReversed != null) {
        scan.setReversed(isReversed);
    }
    return table.getScanner(scan);
}
Also used : ParseFilter(org.apache.hadoop.hbase.filter.ParseFilter) ParseFilter(org.apache.hadoop.hbase.filter.ParseFilter) Filter(org.apache.hadoop.hbase.filter.Filter) PutColumn(org.apache.nifi.hbase.put.PutColumn) Column(org.apache.nifi.hbase.scan.Column) Scan(org.apache.hadoop.hbase.client.Scan)

Example 9 with Column

use of org.apache.nifi.hbase.scan.Column in project nifi by apache.

the class HBase_1_1_2_ClientService method getResults.

// protected and extracted into separate method for testing
protected ResultScanner getResults(final Table table, final Collection<Column> columns, final Filter filter, final long minTime) throws IOException {
    // Create a new scan. We will set the min timerange as the latest timestamp that
    // we have seen so far. The minimum timestamp is inclusive, so we will get duplicates.
    // We will record any cells that have the latest timestamp, so that when we scan again,
    // we know to throw away those duplicates.
    final Scan scan = new Scan();
    scan.setTimeRange(minTime, Long.MAX_VALUE);
    if (filter != null) {
        scan.setFilter(filter);
    }
    if (columns != null) {
        for (Column col : columns) {
            if (col.getQualifier() == null) {
                scan.addFamily(col.getFamily());
            } else {
                scan.addColumn(col.getFamily(), col.getQualifier());
            }
        }
    }
    return table.getScanner(scan);
}
Also used : PutColumn(org.apache.nifi.hbase.put.PutColumn) Column(org.apache.nifi.hbase.scan.Column) Scan(org.apache.hadoop.hbase.client.Scan)

Example 10 with Column

use of org.apache.nifi.hbase.scan.Column in project nifi by apache.

the class HBase_1_1_2_ClientService method getResults.

// protected and extracted into separate method for testing
protected ResultScanner getResults(final Table table, final byte[] startRow, final byte[] endRow, final Collection<Column> columns) throws IOException {
    final Scan scan = new Scan();
    scan.setStartRow(startRow);
    scan.setStopRow(endRow);
    if (columns != null) {
        for (Column col : columns) {
            if (col.getQualifier() == null) {
                scan.addFamily(col.getFamily());
            } else {
                scan.addColumn(col.getFamily(), col.getQualifier());
            }
        }
    }
    return table.getScanner(scan);
}
Also used : PutColumn(org.apache.nifi.hbase.put.PutColumn) Column(org.apache.nifi.hbase.scan.Column) Scan(org.apache.hadoop.hbase.client.Scan)

Aggregations

Column (org.apache.nifi.hbase.scan.Column)14 PutColumn (org.apache.nifi.hbase.put.PutColumn)7 ArrayList (java.util.ArrayList)5 Test (org.junit.Test)4 HashMap (java.util.HashMap)3 Scan (org.apache.hadoop.hbase.client.Scan)3 Table (org.apache.hadoop.hbase.client.Table)3 TestRunner (org.apache.nifi.util.TestRunner)3 IOException (java.io.IOException)2 FlowFile (org.apache.nifi.flowfile.FlowFile)2 ResultCell (org.apache.nifi.hbase.scan.ResultCell)2 ProcessException (org.apache.nifi.processor.exception.ProcessException)2 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 Filter (org.apache.hadoop.hbase.filter.Filter)1 ParseFilter (org.apache.hadoop.hbase.filter.ParseFilter)1 OnScheduled (org.apache.nifi.annotation.lifecycle.OnScheduled)1 StateMap (org.apache.nifi.components.state.StateMap)1 DistributedMapCacheClient (org.apache.nifi.distributed.cache.client.DistributedMapCacheClient)1