Search in sources :

Example 1 with PageFilter

use of org.apache.hadoop.hbase.filter.PageFilter in project hadoop by apache.

the class ApplicationEntityReader method getResults.

protected ResultScanner getResults(Configuration hbaseConf, Connection conn, FilterList filterList) throws IOException {
    Scan scan = new Scan();
    TimelineReaderContext context = getContext();
    // Whether or not flowRunID is null doesn't matter, the
    // ApplicationRowKeyPrefix will do the right thing.
    RowKeyPrefix<ApplicationRowKey> applicationRowKeyPrefix = new ApplicationRowKeyPrefix(context.getClusterId(), context.getUserId(), context.getFlowName(), context.getFlowRunId());
    FilterList newList = new FilterList();
    newList.addFilter(new PageFilter(getFilters().getLimit()));
    if (filterList != null && !filterList.getFilters().isEmpty()) {
    return getTable().getResultScanner(hbaseConf, conn, scan);
Also used : ApplicationRowKeyPrefix( TimelineReaderContext(org.apache.hadoop.yarn.server.timelineservice.reader.TimelineReaderContext) ApplicationRowKey( Scan(org.apache.hadoop.hbase.client.Scan) FilterList(org.apache.hadoop.hbase.filter.FilterList) TimelineFilterList(org.apache.hadoop.yarn.server.timelineservice.reader.filter.TimelineFilterList) PageFilter(org.apache.hadoop.hbase.filter.PageFilter)

Example 2 with PageFilter

use of org.apache.hadoop.hbase.filter.PageFilter in project YCSB by brianfrankcooper.

the class HBaseClient10 method scan.

   * Perform a range scan for a set of records in the database. Each field/value
   * pair from the result will be stored in a HashMap.
   * @param table
   *          The name of the table
   * @param startkey
   *          The record key of the first record to read.
   * @param recordcount
   *          The number of records to read
   * @param fields
   *          The list of fields to read, or null for all of them
   * @param result
   *          A Vector of HashMaps, where each HashMap is a set field/value
   *          pairs for one record
   * @return Zero on success, a non-zero error code on error
public Status scan(String table, String startkey, int recordcount, Set<String> fields, Vector<HashMap<String, ByteIterator>> result) {
    // if this is a "new" table, init HTable object. Else, use existing one
    if (!tableName.equals(table)) {
        currentTable = null;
        try {
            tableName = table;
        } catch (IOException e) {
            System.err.println("Error accessing HBase table: " + e);
            return Status.ERROR;
    Scan s = new Scan(Bytes.toBytes(startkey));
    // HBase has no record limit. Here, assume recordcount is small enough to
    // bring back in one call.
    // We get back recordcount records
    if (this.usePageFilter) {
        s.setFilter(new PageFilter(recordcount));
    // add specified fields or else all fields
    if (fields == null) {
    } else {
        for (String field : fields) {
            s.addColumn(columnFamilyBytes, Bytes.toBytes(field));
    // get results
    ResultScanner scanner = null;
    try {
        scanner = currentTable.getScanner(s);
        int numResults = 0;
        for (Result rr =; rr != null; rr = {
            // get row key
            String key = Bytes.toString(rr.getRow());
            if (debug) {
                System.out.println("Got scan result for key: " + key);
            HashMap<String, ByteIterator> rowResult = new HashMap<String, ByteIterator>();
            while (rr.advance()) {
                final Cell cell = rr.current();
                rowResult.put(Bytes.toString(CellUtil.cloneQualifier(cell)), new ByteArrayByteIterator(CellUtil.cloneValue(cell)));
            // add rowResult to result vector
            // break is required.
            if (numResults >= recordcount) {
                // if hit recordcount, bail out
    // done with row
    } catch (IOException e) {
        if (debug) {
            System.out.println("Error in getting/parsing scan result: " + e);
        return Status.ERROR;
    } finally {
        if (scanner != null) {
    return Status.OK;
Also used : ByteArrayByteIterator( ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) ByteArrayByteIterator( ByteIterator( HashMap(java.util.HashMap) Scan(org.apache.hadoop.hbase.client.Scan) PageFilter(org.apache.hadoop.hbase.filter.PageFilter) IOException( Cell(org.apache.hadoop.hbase.Cell) Result(org.apache.hadoop.hbase.client.Result)

Example 3 with PageFilter

use of org.apache.hadoop.hbase.filter.PageFilter in project camel by apache.

the class HBaseConsumer method poll.

protected int poll() throws Exception {
    try (Table table = endpoint.getTable()) {
        shutdownRunningTask = null;
        pendingExchanges = 0;
        Queue<Exchange> queue = new LinkedList<>();
        Scan scan = new Scan();
        List<Filter> filters = new LinkedList<>();
        if (endpoint.getFilters() != null) {
        if (maxMessagesPerPoll > 0) {
            filters.add(new PageFilter(maxMessagesPerPoll));
        if (!filters.isEmpty()) {
            Filter compoundFilter = new FilterList(filters);
        if (rowModel != null && rowModel.getCells() != null) {
            Set<HBaseCell> cellModels = rowModel.getCells();
            for (HBaseCell cellModel : cellModels) {
                scan.addColumn(HBaseHelper.getHBaseFieldAsBytes(cellModel.getFamily()), HBaseHelper.getHBaseFieldAsBytes(cellModel.getQualifier()));
        ResultScanner scanner = table.getScanner(scan);
        int exchangeCount = 0;
        // The next three statements are used just to get a reference to the BodyCellMappingStrategy instance.
        Exchange exchange = endpoint.createExchange();
        exchange.getIn().setHeader(CellMappingStrategyFactory.STRATEGY, CellMappingStrategyFactory.BODY);
        CellMappingStrategy mappingStrategy = endpoint.getCellMappingStrategyFactory().getStrategy(exchange.getIn());
        for (Result result =; (exchangeCount < maxMessagesPerPoll || maxMessagesPerPoll <= 0) && result != null; result = {
            HBaseData data = new HBaseData();
            HBaseRow resultRow = new HBaseRow();
            byte[] row = result.getRow();
            resultRow.setId(endpoint.getCamelContext().getTypeConverter().convertTo(rowModel.getRowType(), row));
            List<Cell> cells = result.listCells();
            if (cells != null) {
                Set<HBaseCell> cellModels = rowModel.getCells();
                if (cellModels.size() > 0) {
                    for (HBaseCell modelCell : cellModels) {
                        HBaseCell resultCell = new HBaseCell();
                        String family = modelCell.getFamily();
                        String column = modelCell.getQualifier();
                        resultCell.setValue(endpoint.getCamelContext().getTypeConverter().convertTo(modelCell.getValueType(), result.getValue(HBaseHelper.getHBaseFieldAsBytes(family), HBaseHelper.getHBaseFieldAsBytes(column))));
                } else {
                    // just need to put every key value into the result Cells
                    for (Cell cell : cells) {
                        String qualifier = new String(CellUtil.cloneQualifier(cell));
                        String family = new String(CellUtil.cloneFamily(cell));
                        HBaseCell resultCell = new HBaseCell();
                        resultCell.setValue(endpoint.getCamelContext().getTypeConverter().convertTo(String.class, CellUtil.cloneValue(cell)));
                exchange = endpoint.createExchange();
                // Probably overkill but kept it here for consistency.
                exchange.getIn().setHeader(CellMappingStrategyFactory.STRATEGY, CellMappingStrategyFactory.BODY);
                mappingStrategy.applyScanResults(exchange.getIn(), data);
                //Make sure that there is a header containing the marked row ids, so that they can be deleted.
                exchange.getIn().setHeader(HBaseAttribute.HBASE_MARKED_ROW_ID.asHeader(), result.getRow());
        return queue.isEmpty() ? 0 : processBatch(CastUtils.cast(queue));
Also used : Table(org.apache.hadoop.hbase.client.Table) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) CellMappingStrategy(org.apache.camel.component.hbase.mapping.CellMappingStrategy) FilterList(org.apache.hadoop.hbase.filter.FilterList) LinkedList(java.util.LinkedList) HBaseCell(org.apache.camel.component.hbase.model.HBaseCell) Result(org.apache.hadoop.hbase.client.Result) Exchange(org.apache.camel.Exchange) PageFilter(org.apache.hadoop.hbase.filter.PageFilter) Filter(org.apache.hadoop.hbase.filter.Filter) HBaseData(org.apache.camel.component.hbase.model.HBaseData) HBaseRow(org.apache.camel.component.hbase.model.HBaseRow) Scan(org.apache.hadoop.hbase.client.Scan) PageFilter(org.apache.hadoop.hbase.filter.PageFilter) HBaseCell(org.apache.camel.component.hbase.model.HBaseCell) Cell(org.apache.hadoop.hbase.Cell)

Example 4 with PageFilter

use of org.apache.hadoop.hbase.filter.PageFilter in project phoenix by apache.

the class BaseResultIterators method initializeScan.

private static void initializeScan(QueryPlan plan, Integer perScanLimit, Integer offset, Scan scan) throws SQLException {
    StatementContext context = plan.getContext();
    TableRef tableRef = plan.getTableRef();
    PTable table = tableRef.getTable();
    Map<byte[], NavigableSet<byte[]>> familyMap = scan.getFamilyMap();
    // Hack for PHOENIX-2067 to force raw scan over all KeyValues to fix their row keys
    if (context.getConnection().isDescVarLengthRowKeyUpgrade()) {
        // We project *all* KeyValues across all column families as we make a pass over
        // a physical table and we want to make sure we catch all KeyValues that may be
        // dynamic or part of an updatable view.
        // Remove any filter
        // Traverse (and subsequently clone) all KeyValues
        // Pass over PTable so we can re-write rows according to the row key schema
        scan.setAttribute(BaseScannerRegionObserver.UPGRADE_DESC_ROW_KEY, UngroupedAggregateRegionObserver.serialize(table));
    } else {
        FilterableStatement statement = plan.getStatement();
        RowProjector projector = plan.getProjector();
        boolean optimizeProjection = false;
        boolean keyOnlyFilter = familyMap.isEmpty() && context.getWhereConditionColumns().isEmpty();
        if (!projector.projectEverything()) {
            // not match the actual column families of the table (which is bad).
            if (keyOnlyFilter && table.getColumnFamilies().size() == 1) {
                // Project the one column family. We must project a column family since it's possible
                // that there are other non declared column families that we need to ignore.
            } else {
                optimizeProjection = true;
                if (projector.projectEveryRow()) {
                    if (table.getViewType() == ViewType.MAPPED) {
                        // Since we don't have the empty key value in MAPPED tables,
                        // we must project all CFs in HRS. However, only the
                        // selected column values are returned back to client.
                        for (PColumnFamily family : table.getColumnFamilies()) {
                            context.addWhereConditionColumn(family.getName().getBytes(), null);
                    } else {
                        byte[] ecf = SchemaUtil.getEmptyColumnFamily(table);
                        // been projected in its entirety.
                        if (!familyMap.containsKey(ecf) || familyMap.get(ecf) != null) {
                            scan.addColumn(ecf, EncodedColumnsUtil.getEmptyKeyValueInfo(table).getFirst());
        // Add FirstKeyOnlyFilter if there are no references to key value columns
        if (keyOnlyFilter) {
            ScanUtil.andFilterAtBeginning(scan, new FirstKeyOnlyFilter());
        if (perScanLimit != null) {
            ScanUtil.andFilterAtEnd(scan, new PageFilter(perScanLimit));
        if (offset != null) {
            ScanUtil.addOffsetAttribute(scan, offset);
        int cols = plan.getGroupBy().getOrderPreservingColumnCount();
        if (cols > 0 && keyOnlyFilter && !plan.getStatement().getHint().hasHint(HintNode.Hint.RANGE_SCAN) && cols < plan.getTableRef().getTable().getRowKeySchema().getFieldCount() && plan.getGroupBy().isOrderPreserving() && (context.getAggregationManager().isEmpty() || plan.getGroupBy().isUngroupedAggregate())) {
            ScanUtil.andFilterAtEnd(scan, new DistinctPrefixFilter(plan.getTableRef().getTable().getRowKeySchema(), cols));
            if (plan.getLimit() != null) {
                // We can push the limit to the server
                ScanUtil.andFilterAtEnd(scan, new PageFilter(plan.getLimit()));
        scan.setAttribute(BaseScannerRegionObserver.QUALIFIER_ENCODING_SCHEME, new byte[] { table.getEncodingScheme().getSerializedMetadataValue() });
        scan.setAttribute(BaseScannerRegionObserver.IMMUTABLE_STORAGE_ENCODING_SCHEME, new byte[] { table.getImmutableStorageScheme().getSerializedMetadataValue() });
        // we use this flag on the server side to determine which value column qualifier to use in the key value we return from server.
        scan.setAttribute(BaseScannerRegionObserver.USE_NEW_VALUE_COLUMN_QUALIFIER, Bytes.toBytes(true));
        // So there is no point setting the range.
        if (!ScanUtil.isAnalyzeTable(scan)) {
            setQualifierRanges(keyOnlyFilter, table, scan, context);
        if (optimizeProjection) {
            optimizeProjection(context, scan, table, statement);
Also used : NavigableSet(java.util.NavigableSet) FirstKeyOnlyFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter) DistinctPrefixFilter(org.apache.phoenix.filter.DistinctPrefixFilter) PColumnFamily(org.apache.phoenix.schema.PColumnFamily) PTable(org.apache.phoenix.schema.PTable) Hint(org.apache.phoenix.parse.HintNode.Hint) StatementContext(org.apache.phoenix.compile.StatementContext) RowProjector(org.apache.phoenix.compile.RowProjector) FilterableStatement(org.apache.phoenix.parse.FilterableStatement) PageFilter(org.apache.hadoop.hbase.filter.PageFilter) TableRef(org.apache.phoenix.schema.TableRef)

Example 5 with PageFilter

use of org.apache.hadoop.hbase.filter.PageFilter in project atlas by apache.

the class HBaseBasedAuditRepository method listEventsV1.

 * List events for the given entity id in decreasing order of timestamp, from the given startKey. Returns n results
 * @param entityId entity id
 * @param startKey key for the first event to be returned, used for pagination
 * @param n number of events to be returned
 * @return list of events
 * @throws AtlasException
public List<EntityAuditEvent> listEventsV1(String entityId, String startKey, short n) throws AtlasException {
    if (LOG.isDebugEnabled()) {
        LOG.debug("Listing events for entity id {}, starting timestamp {}, #records {}", entityId, startKey, n);
    Table table = null;
    ResultScanner scanner = null;
    try {
        table = connection.getTable(tableName);
         * Scan Details:
         * In hbase, the events are stored in increasing order of timestamp. So, doing reverse scan to get the latest event first
         * Page filter is set to limit the number of results returned.
         * Stop row is set to the entity id to avoid going past the current entity while scanning
         * small is set to true to optimise RPC calls as the scanner is created per request
        Scan scan = new Scan().setReversed(true).setFilter(new PageFilter(n)).setStopRow(Bytes.toBytes(entityId)).setCaching(n).setSmall(true);
        if (StringUtils.isEmpty(startKey)) {
            // Set start row to entity id + max long value
            byte[] entityBytes = getKey(entityId, Long.MAX_VALUE);
            scan = scan.setStartRow(entityBytes);
        } else {
            scan = scan.setStartRow(Bytes.toBytes(startKey));
        scanner = table.getScanner(scan);
        Result result;
        List<EntityAuditEvent> events = new ArrayList<>();
        // So, adding extra check on n here
        while ((result = != null && events.size() < n) {
            EntityAuditEvent event = fromKey(result.getRow());
            // In case the user sets random start key, guarding against random events
            if (!event.getEntityId().equals(entityId)) {
            event.setUser(getResultString(result, COLUMN_USER));
            event.setAction(EntityAuditEvent.EntityAuditAction.fromString(getResultString(result, COLUMN_ACTION)));
            event.setDetails(getResultString(result, COLUMN_DETAIL));
            if (persistEntityDefinition) {
                String colDef = getResultString(result, COLUMN_DEFINITION);
                if (colDef != null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Got events for entity id {}, starting timestamp {}, #records {}", entityId, startKey, events.size());
        return events;
    } catch (IOException e) {
        throw new AtlasException(e);
    } finally {
Also used : EntityAuditEvent(org.apache.atlas.EntityAuditEvent) Table(org.apache.hadoop.hbase.client.Table) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) ArrayList(java.util.ArrayList) Scan(org.apache.hadoop.hbase.client.Scan) PageFilter(org.apache.hadoop.hbase.filter.PageFilter) IOException( AtlasException(org.apache.atlas.AtlasException) Result(org.apache.hadoop.hbase.client.Result)


PageFilter (org.apache.hadoop.hbase.filter.PageFilter)18 Scan (org.apache.hadoop.hbase.client.Scan)14 IOException ( Result (org.apache.hadoop.hbase.client.Result)8 ResultScanner (org.apache.hadoop.hbase.client.ResultScanner)8 FilterList (org.apache.hadoop.hbase.filter.FilterList)6 Table (org.apache.hadoop.hbase.client.Table)5 ArrayList (java.util.ArrayList)4 Cell (org.apache.hadoop.hbase.Cell)4 HashMap (java.util.HashMap)3 AtlasException (org.apache.atlas.AtlasException)3 Filter (org.apache.hadoop.hbase.filter.Filter)3 ByteArrayByteIterator ( ByteIterator ( EntityAuditEvent (org.apache.atlas.EntityAuditEvent)2 KeyValue (org.apache.hadoop.hbase.KeyValue)2 FirstKeyOnlyFilter (org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter)2 TimelineReaderContext (org.apache.hadoop.yarn.server.timelineservice.reader.TimelineReaderContext)2 TimelineFilterList (org.apache.hadoop.yarn.server.timelineservice.reader.filter.TimelineFilterList)2 DistinctPrefixFilter (org.apache.phoenix.filter.DistinctPrefixFilter)2