use of org.apache.hadoop.hbase.filter.PageFilter in project hadoop by apache.
the class ApplicationEntityReader method getResults.
@Override
protected ResultScanner getResults(Configuration hbaseConf, Connection conn, FilterList filterList) throws IOException {
Scan scan = new Scan();
TimelineReaderContext context = getContext();
// Whether or not flowRunID is null doesn't matter, the
// ApplicationRowKeyPrefix will do the right thing.
RowKeyPrefix<ApplicationRowKey> applicationRowKeyPrefix = new ApplicationRowKeyPrefix(context.getClusterId(), context.getUserId(), context.getFlowName(), context.getFlowRunId());
scan.setRowPrefixFilter(applicationRowKeyPrefix.getRowKeyPrefix());
FilterList newList = new FilterList();
newList.addFilter(new PageFilter(getFilters().getLimit()));
if (filterList != null && !filterList.getFilters().isEmpty()) {
newList.addFilter(filterList);
}
scan.setFilter(newList);
scan.setMaxVersions(getDataToRetrieve().getMetricsLimit());
return getTable().getResultScanner(hbaseConf, conn, scan);
}
use of org.apache.hadoop.hbase.filter.PageFilter in project YCSB by brianfrankcooper.
the class HBaseClient10 method scan.
/**
* Perform a range scan for a set of records in the database. Each field/value
* pair from the result will be stored in a HashMap.
*
* @param table
* The name of the table
* @param startkey
* The record key of the first record to read.
* @param recordcount
* The number of records to read
* @param fields
* The list of fields to read, or null for all of them
* @param result
* A Vector of HashMaps, where each HashMap is a set field/value
* pairs for one record
* @return Zero on success, a non-zero error code on error
*/
@Override
public Status scan(String table, String startkey, int recordcount, Set<String> fields, Vector<HashMap<String, ByteIterator>> result) {
// if this is a "new" table, init HTable object. Else, use existing one
if (!tableName.equals(table)) {
currentTable = null;
try {
getHTable(table);
tableName = table;
} catch (IOException e) {
System.err.println("Error accessing HBase table: " + e);
return Status.ERROR;
}
}
Scan s = new Scan(Bytes.toBytes(startkey));
// HBase has no record limit. Here, assume recordcount is small enough to
// bring back in one call.
// We get back recordcount records
s.setCaching(recordcount);
if (this.usePageFilter) {
s.setFilter(new PageFilter(recordcount));
}
// add specified fields or else all fields
if (fields == null) {
s.addFamily(columnFamilyBytes);
} else {
for (String field : fields) {
s.addColumn(columnFamilyBytes, Bytes.toBytes(field));
}
}
// get results
ResultScanner scanner = null;
try {
scanner = currentTable.getScanner(s);
int numResults = 0;
for (Result rr = scanner.next(); rr != null; rr = scanner.next()) {
// get row key
String key = Bytes.toString(rr.getRow());
if (debug) {
System.out.println("Got scan result for key: " + key);
}
HashMap<String, ByteIterator> rowResult = new HashMap<String, ByteIterator>();
while (rr.advance()) {
final Cell cell = rr.current();
rowResult.put(Bytes.toString(CellUtil.cloneQualifier(cell)), new ByteArrayByteIterator(CellUtil.cloneValue(cell)));
}
// add rowResult to result vector
result.add(rowResult);
numResults++;
// break is required.
if (numResults >= recordcount) {
// if hit recordcount, bail out
break;
}
}
// done with row
} catch (IOException e) {
if (debug) {
System.out.println("Error in getting/parsing scan result: " + e);
}
return Status.ERROR;
} finally {
if (scanner != null) {
scanner.close();
}
}
return Status.OK;
}
use of org.apache.hadoop.hbase.filter.PageFilter in project camel by apache.
the class HBaseConsumer method poll.
@Override
protected int poll() throws Exception {
try (Table table = endpoint.getTable()) {
shutdownRunningTask = null;
pendingExchanges = 0;
Queue<Exchange> queue = new LinkedList<>();
Scan scan = new Scan();
List<Filter> filters = new LinkedList<>();
if (endpoint.getFilters() != null) {
filters.addAll(endpoint.getFilters());
}
if (maxMessagesPerPoll > 0) {
filters.add(new PageFilter(maxMessagesPerPoll));
}
if (!filters.isEmpty()) {
Filter compoundFilter = new FilterList(filters);
scan.setFilter(compoundFilter);
}
if (rowModel != null && rowModel.getCells() != null) {
Set<HBaseCell> cellModels = rowModel.getCells();
for (HBaseCell cellModel : cellModels) {
scan.addColumn(HBaseHelper.getHBaseFieldAsBytes(cellModel.getFamily()), HBaseHelper.getHBaseFieldAsBytes(cellModel.getQualifier()));
}
}
ResultScanner scanner = table.getScanner(scan);
int exchangeCount = 0;
// The next three statements are used just to get a reference to the BodyCellMappingStrategy instance.
Exchange exchange = endpoint.createExchange();
exchange.getIn().setHeader(CellMappingStrategyFactory.STRATEGY, CellMappingStrategyFactory.BODY);
CellMappingStrategy mappingStrategy = endpoint.getCellMappingStrategyFactory().getStrategy(exchange.getIn());
for (Result result = scanner.next(); (exchangeCount < maxMessagesPerPoll || maxMessagesPerPoll <= 0) && result != null; result = scanner.next()) {
HBaseData data = new HBaseData();
HBaseRow resultRow = new HBaseRow();
resultRow.apply(rowModel);
byte[] row = result.getRow();
resultRow.setId(endpoint.getCamelContext().getTypeConverter().convertTo(rowModel.getRowType(), row));
List<Cell> cells = result.listCells();
if (cells != null) {
Set<HBaseCell> cellModels = rowModel.getCells();
if (cellModels.size() > 0) {
for (HBaseCell modelCell : cellModels) {
HBaseCell resultCell = new HBaseCell();
String family = modelCell.getFamily();
String column = modelCell.getQualifier();
resultCell.setValue(endpoint.getCamelContext().getTypeConverter().convertTo(modelCell.getValueType(), result.getValue(HBaseHelper.getHBaseFieldAsBytes(family), HBaseHelper.getHBaseFieldAsBytes(column))));
resultCell.setFamily(modelCell.getFamily());
resultCell.setQualifier(modelCell.getQualifier());
resultRow.getCells().add(resultCell);
}
} else {
// just need to put every key value into the result Cells
for (Cell cell : cells) {
String qualifier = new String(CellUtil.cloneQualifier(cell));
String family = new String(CellUtil.cloneFamily(cell));
HBaseCell resultCell = new HBaseCell();
resultCell.setFamily(family);
resultCell.setQualifier(qualifier);
resultCell.setValue(endpoint.getCamelContext().getTypeConverter().convertTo(String.class, CellUtil.cloneValue(cell)));
resultRow.getCells().add(resultCell);
}
}
data.getRows().add(resultRow);
exchange = endpoint.createExchange();
// Probably overkill but kept it here for consistency.
exchange.getIn().setHeader(CellMappingStrategyFactory.STRATEGY, CellMappingStrategyFactory.BODY);
mappingStrategy.applyScanResults(exchange.getIn(), data);
//Make sure that there is a header containing the marked row ids, so that they can be deleted.
exchange.getIn().setHeader(HBaseAttribute.HBASE_MARKED_ROW_ID.asHeader(), result.getRow());
queue.add(exchange);
exchangeCount++;
}
}
scanner.close();
return queue.isEmpty() ? 0 : processBatch(CastUtils.cast(queue));
}
}
use of org.apache.hadoop.hbase.filter.PageFilter in project phoenix by apache.
the class BaseResultIterators method initializeScan.
private static void initializeScan(QueryPlan plan, Integer perScanLimit, Integer offset, Scan scan) throws SQLException {
StatementContext context = plan.getContext();
TableRef tableRef = plan.getTableRef();
PTable table = tableRef.getTable();
Map<byte[], NavigableSet<byte[]>> familyMap = scan.getFamilyMap();
// Hack for PHOENIX-2067 to force raw scan over all KeyValues to fix their row keys
if (context.getConnection().isDescVarLengthRowKeyUpgrade()) {
// We project *all* KeyValues across all column families as we make a pass over
// a physical table and we want to make sure we catch all KeyValues that may be
// dynamic or part of an updatable view.
familyMap.clear();
scan.setMaxVersions();
// Remove any filter
scan.setFilter(null);
// Traverse (and subsequently clone) all KeyValues
scan.setRaw(true);
// Pass over PTable so we can re-write rows according to the row key schema
scan.setAttribute(BaseScannerRegionObserver.UPGRADE_DESC_ROW_KEY, UngroupedAggregateRegionObserver.serialize(table));
} else {
FilterableStatement statement = plan.getStatement();
RowProjector projector = plan.getProjector();
boolean optimizeProjection = false;
boolean keyOnlyFilter = familyMap.isEmpty() && context.getWhereConditionColumns().isEmpty();
if (!projector.projectEverything()) {
// not match the actual column families of the table (which is bad).
if (keyOnlyFilter && table.getColumnFamilies().size() == 1) {
// Project the one column family. We must project a column family since it's possible
// that there are other non declared column families that we need to ignore.
scan.addFamily(table.getColumnFamilies().get(0).getName().getBytes());
} else {
optimizeProjection = true;
if (projector.projectEveryRow()) {
if (table.getViewType() == ViewType.MAPPED) {
// Since we don't have the empty key value in MAPPED tables,
// we must project all CFs in HRS. However, only the
// selected column values are returned back to client.
context.getWhereConditionColumns().clear();
for (PColumnFamily family : table.getColumnFamilies()) {
context.addWhereConditionColumn(family.getName().getBytes(), null);
}
} else {
byte[] ecf = SchemaUtil.getEmptyColumnFamily(table);
// been projected in its entirety.
if (!familyMap.containsKey(ecf) || familyMap.get(ecf) != null) {
scan.addColumn(ecf, EncodedColumnsUtil.getEmptyKeyValueInfo(table).getFirst());
}
}
}
}
}
// Add FirstKeyOnlyFilter if there are no references to key value columns
if (keyOnlyFilter) {
ScanUtil.andFilterAtBeginning(scan, new FirstKeyOnlyFilter());
}
if (perScanLimit != null) {
ScanUtil.andFilterAtEnd(scan, new PageFilter(perScanLimit));
}
if (offset != null) {
ScanUtil.addOffsetAttribute(scan, offset);
}
int cols = plan.getGroupBy().getOrderPreservingColumnCount();
if (cols > 0 && keyOnlyFilter && !plan.getStatement().getHint().hasHint(HintNode.Hint.RANGE_SCAN) && cols < plan.getTableRef().getTable().getRowKeySchema().getFieldCount() && plan.getGroupBy().isOrderPreserving() && (context.getAggregationManager().isEmpty() || plan.getGroupBy().isUngroupedAggregate())) {
ScanUtil.andFilterAtEnd(scan, new DistinctPrefixFilter(plan.getTableRef().getTable().getRowKeySchema(), cols));
if (plan.getLimit() != null) {
// We can push the limit to the server
ScanUtil.andFilterAtEnd(scan, new PageFilter(plan.getLimit()));
}
}
scan.setAttribute(BaseScannerRegionObserver.QUALIFIER_ENCODING_SCHEME, new byte[] { table.getEncodingScheme().getSerializedMetadataValue() });
scan.setAttribute(BaseScannerRegionObserver.IMMUTABLE_STORAGE_ENCODING_SCHEME, new byte[] { table.getImmutableStorageScheme().getSerializedMetadataValue() });
// we use this flag on the server side to determine which value column qualifier to use in the key value we return from server.
scan.setAttribute(BaseScannerRegionObserver.USE_NEW_VALUE_COLUMN_QUALIFIER, Bytes.toBytes(true));
// So there is no point setting the range.
if (!ScanUtil.isAnalyzeTable(scan)) {
setQualifierRanges(keyOnlyFilter, table, scan, context);
}
if (optimizeProjection) {
optimizeProjection(context, scan, table, statement);
}
}
}
use of org.apache.hadoop.hbase.filter.PageFilter in project atlas by apache.
the class HBaseBasedAuditRepository method listEventsV1.
/**
* List events for the given entity id in decreasing order of timestamp, from the given startKey. Returns n results
* @param entityId entity id
* @param startKey key for the first event to be returned, used for pagination
* @param n number of events to be returned
* @return list of events
* @throws AtlasException
*/
public List<EntityAuditEvent> listEventsV1(String entityId, String startKey, short n) throws AtlasException {
if (LOG.isDebugEnabled()) {
LOG.debug("Listing events for entity id {}, starting timestamp {}, #records {}", entityId, startKey, n);
}
Table table = null;
ResultScanner scanner = null;
try {
table = connection.getTable(tableName);
/**
* Scan Details:
* In hbase, the events are stored in increasing order of timestamp. So, doing reverse scan to get the latest event first
* Page filter is set to limit the number of results returned.
* Stop row is set to the entity id to avoid going past the current entity while scanning
* small is set to true to optimise RPC calls as the scanner is created per request
*/
Scan scan = new Scan().setReversed(true).setFilter(new PageFilter(n)).setStopRow(Bytes.toBytes(entityId)).setCaching(n).setSmall(true);
if (StringUtils.isEmpty(startKey)) {
// Set start row to entity id + max long value
byte[] entityBytes = getKey(entityId, Long.MAX_VALUE);
scan = scan.setStartRow(entityBytes);
} else {
scan = scan.setStartRow(Bytes.toBytes(startKey));
}
scanner = table.getScanner(scan);
Result result;
List<EntityAuditEvent> events = new ArrayList<>();
// So, adding extra check on n here
while ((result = scanner.next()) != null && events.size() < n) {
EntityAuditEvent event = fromKey(result.getRow());
// In case the user sets random start key, guarding against random events
if (!event.getEntityId().equals(entityId)) {
continue;
}
event.setUser(getResultString(result, COLUMN_USER));
event.setAction(EntityAuditEvent.EntityAuditAction.fromString(getResultString(result, COLUMN_ACTION)));
event.setDetails(getResultString(result, COLUMN_DETAIL));
if (persistEntityDefinition) {
String colDef = getResultString(result, COLUMN_DEFINITION);
if (colDef != null) {
event.setEntityDefinition(colDef);
}
}
events.add(event);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Got events for entity id {}, starting timestamp {}, #records {}", entityId, startKey, events.size());
}
return events;
} catch (IOException e) {
throw new AtlasException(e);
} finally {
close(scanner);
close(table);
}
}
Aggregations