use of org.apache.nifi.hbase.scan.Column in project nifi by apache.
the class ScanHBase method getColumns.
/**
* @param columnsValue a String in the form colFam:colQual,colFam:colQual
* @return a list of Columns based on parsing the given String
*/
private List<Column> getColumns(final String columnsValue) {
final String[] columns = (columnsValue == null || columnsValue.isEmpty() ? new String[0] : columnsValue.split(","));
List<Column> columnsList = new ArrayList<>(columns.length);
for (final String column : columns) {
if (column.contains(":")) {
final String[] parts = column.split(":");
final byte[] cf = parts[0].getBytes(StandardCharsets.UTF_8);
final byte[] cq = parts[1].getBytes(StandardCharsets.UTF_8);
columnsList.add(new Column(cf, cq));
} else {
final byte[] cf = column.getBytes(StandardCharsets.UTF_8);
columnsList.add(new Column(cf, null));
}
}
return columnsList;
}
use of org.apache.nifi.hbase.scan.Column in project nifi by apache.
the class ScanHBase method onTrigger.
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
FlowFile flowFile = session.get();
if (flowFile == null) {
return;
}
try {
final String tableName = context.getProperty(TABLE_NAME).evaluateAttributeExpressions(flowFile).getValue();
if (StringUtils.isBlank(tableName)) {
getLogger().error("Table Name is blank or null for {}, transferring to failure", new Object[] { flowFile });
session.transfer(session.penalize(flowFile), REL_FAILURE);
return;
}
final String startRow = context.getProperty(START_ROW).evaluateAttributeExpressions(flowFile).getValue();
final String endRow = context.getProperty(END_ROW).evaluateAttributeExpressions(flowFile).getValue();
final String filterExpression = context.getProperty(FILTER_EXPRESSION).evaluateAttributeExpressions(flowFile).getValue();
// evaluate and validate time range min and max values. They both should be either empty or provided.
Long timerangeMin = null;
Long timerangeMax = null;
try {
timerangeMin = context.getProperty(TIME_RANGE_MIN).evaluateAttributeExpressions(flowFile).asLong();
} catch (Exception e) {
getLogger().error("Time range min value is not a number ({}) for {}, transferring to failure", new Object[] { context.getProperty(TIME_RANGE_MIN).evaluateAttributeExpressions(flowFile).getValue(), flowFile });
session.transfer(session.penalize(flowFile), REL_FAILURE);
return;
}
try {
timerangeMax = context.getProperty(TIME_RANGE_MAX).evaluateAttributeExpressions(flowFile).asLong();
} catch (Exception e) {
getLogger().error("Time range max value is not a number ({}) for {}, transferring to failure", new Object[] { context.getProperty(TIME_RANGE_MAX).evaluateAttributeExpressions(flowFile).getValue(), flowFile });
session.transfer(session.penalize(flowFile), REL_FAILURE);
return;
}
if (timerangeMin == null && timerangeMax != null) {
getLogger().error("Time range min value cannot be blank when max value provided for {}, transferring to failure", new Object[] { flowFile });
session.transfer(session.penalize(flowFile), REL_FAILURE);
return;
} else if (timerangeMin != null && timerangeMax == null) {
getLogger().error("Time range max value cannot be blank when min value provided for {}, transferring to failure", new Object[] { flowFile });
session.transfer(session.penalize(flowFile), REL_FAILURE);
return;
}
final Integer limitRows = context.getProperty(LIMIT_ROWS).evaluateAttributeExpressions(flowFile).asInteger();
final Boolean isReversed = context.getProperty(REVERSED_SCAN).asBoolean();
final Integer bulkSize = context.getProperty(BULK_SIZE).evaluateAttributeExpressions(flowFile).asInteger();
final List<Column> columns = getColumns(context.getProperty(COLUMNS).evaluateAttributeExpressions(flowFile).getValue());
final HBaseClientService hBaseClientService = context.getProperty(HBASE_CLIENT_SERVICE).asControllerService(HBaseClientService.class);
final AtomicReference<Long> rowsPulledHolder = new AtomicReference<>(0L);
final AtomicReference<Long> ffCountHolder = new AtomicReference<>(0L);
ScanHBaseResultHandler handler = new ScanHBaseResultHandler(context, session, flowFile, rowsPulledHolder, ffCountHolder, hBaseClientService, tableName, bulkSize);
try {
hBaseClientService.scan(tableName, startRow, endRow, filterExpression, timerangeMin, timerangeMax, limitRows, isReversed, columns, handler);
} catch (Exception e) {
if (handler.getFlowFile() != null) {
session.remove(handler.getFlowFile());
}
getLogger().error("Unable to fetch rows from HBase table {} due to {}", new Object[] { tableName, e });
flowFile = session.putAttribute(flowFile, "scanhbase.results.found", Boolean.toString(handler.isHandledAny()));
session.transfer(flowFile, REL_FAILURE);
return;
}
flowFile = session.putAttribute(flowFile, "scanhbase.results.found", Boolean.toString(handler.isHandledAny()));
FlowFile openedFF = handler.getFlowFile();
if (openedFF != null) {
finalizeFlowFile(session, hBaseClientService, openedFF, tableName, handler.getRecordsCount(), null);
}
session.transfer(flowFile, REL_ORIGINAL);
session.commit();
} catch (final Exception e) {
getLogger().error("Failed to receive data from HBase due to {}", e);
session.rollback();
// if we failed, we want to yield so that we don't hammer hbase.
context.yield();
}
}
use of org.apache.nifi.hbase.scan.Column in project nifi by apache.
the class HBase_1_1_2_ClientService method getResults.
//
protected ResultScanner getResults(final Table table, final String startRow, final String endRow, final String filterExpression, final Long timerangeMin, final Long timerangeMax, final Integer limitRows, final Boolean isReversed, final Collection<Column> columns) throws IOException {
final Scan scan = new Scan();
if (!StringUtils.isBlank(startRow)) {
scan.setStartRow(startRow.getBytes(StandardCharsets.UTF_8));
}
if (!StringUtils.isBlank(endRow)) {
scan.setStopRow(endRow.getBytes(StandardCharsets.UTF_8));
}
Filter filter = null;
if (columns != null) {
for (Column col : columns) {
if (col.getQualifier() == null) {
scan.addFamily(col.getFamily());
} else {
scan.addColumn(col.getFamily(), col.getQualifier());
}
}
}
if (!StringUtils.isBlank(filterExpression)) {
ParseFilter parseFilter = new ParseFilter();
filter = parseFilter.parseFilterString(filterExpression);
}
if (filter != null) {
scan.setFilter(filter);
}
if (timerangeMin != null && timerangeMax != null) {
scan.setTimeRange(timerangeMin, timerangeMax);
}
if (isReversed != null) {
scan.setReversed(isReversed);
}
return table.getScanner(scan);
}
use of org.apache.nifi.hbase.scan.Column in project nifi by apache.
the class HBase_1_1_2_ClientService method getResults.
// protected and extracted into separate method for testing
protected ResultScanner getResults(final Table table, final Collection<Column> columns, final Filter filter, final long minTime) throws IOException {
// Create a new scan. We will set the min timerange as the latest timestamp that
// we have seen so far. The minimum timestamp is inclusive, so we will get duplicates.
// We will record any cells that have the latest timestamp, so that when we scan again,
// we know to throw away those duplicates.
final Scan scan = new Scan();
scan.setTimeRange(minTime, Long.MAX_VALUE);
if (filter != null) {
scan.setFilter(filter);
}
if (columns != null) {
for (Column col : columns) {
if (col.getQualifier() == null) {
scan.addFamily(col.getFamily());
} else {
scan.addColumn(col.getFamily(), col.getQualifier());
}
}
}
return table.getScanner(scan);
}
use of org.apache.nifi.hbase.scan.Column in project nifi by apache.
the class HBase_1_1_2_ClientService method getResults.
// protected and extracted into separate method for testing
protected ResultScanner getResults(final Table table, final byte[] startRow, final byte[] endRow, final Collection<Column> columns) throws IOException {
final Scan scan = new Scan();
scan.setStartRow(startRow);
scan.setStopRow(endRow);
if (columns != null) {
for (Column col : columns) {
if (col.getQualifier() == null) {
scan.addFamily(col.getFamily());
} else {
scan.addColumn(col.getFamily(), col.getQualifier());
}
}
}
return table.getScanner(scan);
}
Aggregations