Search in sources :

Example 6 with PageFilter

use of org.apache.hadoop.hbase.filter.PageFilter in project hadoop by apache.

the class FlowActivityEntityReader method getResults.

protected ResultScanner getResults(Configuration hbaseConf, Connection conn, FilterList filterList) throws IOException {
    Scan scan = new Scan();
    String clusterId = getContext().getClusterId();
    if (getFilters().getCreatedTimeBegin() == 0L && getFilters().getCreatedTimeEnd() == Long.MAX_VALUE) {
        // All records have to be chosen.
        scan.setRowPrefixFilter(new FlowActivityRowKeyPrefix(clusterId).getRowKeyPrefix());
    } else {
        scan.setStartRow(new FlowActivityRowKeyPrefix(clusterId, getFilters().getCreatedTimeEnd()).getRowKeyPrefix());
        scan.setStopRow(new FlowActivityRowKeyPrefix(clusterId, (getFilters().getCreatedTimeBegin() <= 0 ? 0 : (getFilters().getCreatedTimeBegin() - 1))).getRowKeyPrefix());
    // use the page filter to limit the result to the page size
    // the scanner may still return more than the limit; therefore we need to
    // read the right number as we iterate
    scan.setFilter(new PageFilter(getFilters().getLimit()));
    return getTable().getResultScanner(hbaseConf, conn, scan);
Also used : FlowActivityRowKeyPrefix( Scan(org.apache.hadoop.hbase.client.Scan) PageFilter(org.apache.hadoop.hbase.filter.PageFilter)

Example 7 with PageFilter

use of org.apache.hadoop.hbase.filter.PageFilter in project hadoop by apache.

the class FlowRunEntityReader method getResults.

protected ResultScanner getResults(Configuration hbaseConf, Connection conn, FilterList filterList) throws IOException {
    Scan scan = new Scan();
    TimelineReaderContext context = getContext();
    RowKeyPrefix<FlowRunRowKey> flowRunRowKeyPrefix = new FlowRunRowKeyPrefix(context.getClusterId(), context.getUserId(), context.getFlowName());
    FilterList newList = new FilterList();
    newList.addFilter(new PageFilter(getFilters().getLimit()));
    if (filterList != null && !filterList.getFilters().isEmpty()) {
    return getTable().getResultScanner(hbaseConf, conn, scan);
Also used : TimelineReaderContext(org.apache.hadoop.yarn.server.timelineservice.reader.TimelineReaderContext) FlowRunRowKey( FlowRunRowKeyPrefix( Scan(org.apache.hadoop.hbase.client.Scan) FilterList(org.apache.hadoop.hbase.filter.FilterList) TimelineFilterList(org.apache.hadoop.yarn.server.timelineservice.reader.filter.TimelineFilterList) PageFilter(org.apache.hadoop.hbase.filter.PageFilter)

Example 8 with PageFilter

use of org.apache.hadoop.hbase.filter.PageFilter in project hbase by apache.

the class TestReversibleScanners method testReversibleRegionScanner.

public void testReversibleRegionScanner() throws IOException {
    byte[] FAMILYNAME2 = Bytes.toBytes("testCf2");
    HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(name.getMethodName())).addFamily(new HColumnDescriptor(FAMILYNAME)).addFamily(new HColumnDescriptor(FAMILYNAME2));
    Region region = TEST_UTIL.createLocalHRegion(htd, null, null);
    loadDataToRegion(region, FAMILYNAME2);
    // verify row count with forward scan
    Scan scan = new Scan();
    InternalScanner scanner = region.getScanner(scan);
    verifyCountAndOrder(scanner, ROWSIZE * QUALSIZE * 2, ROWSIZE, true);
    // Case1:Full reversed scan
    scanner = region.getScanner(scan);
    verifyCountAndOrder(scanner, ROWSIZE * QUALSIZE * 2, ROWSIZE, false);
    // Case2:Full reversed scan with one family
    scan = new Scan();
    scanner = region.getScanner(scan);
    verifyCountAndOrder(scanner, ROWSIZE * QUALSIZE, ROWSIZE, false);
    // Case3:Specify qualifiers + One family
    byte[][] specifiedQualifiers = { QUALS[1], QUALS[2] };
    for (byte[] specifiedQualifier : specifiedQualifiers) scan.addColumn(FAMILYNAME, specifiedQualifier);
    scanner = region.getScanner(scan);
    verifyCountAndOrder(scanner, ROWSIZE * 2, ROWSIZE, false);
    // Case4:Specify qualifiers + Two families
    for (byte[] specifiedQualifier : specifiedQualifiers) scan.addColumn(FAMILYNAME2, specifiedQualifier);
    scanner = region.getScanner(scan);
    verifyCountAndOrder(scanner, ROWSIZE * 2 * 2, ROWSIZE, false);
    // Case5: Case4 + specify start row
    int startRowNum = ROWSIZE * 3 / 4;
    scanner = region.getScanner(scan);
    verifyCountAndOrder(scanner, (startRowNum + 1) * 2 * 2, (startRowNum + 1), false);
    // Case6: Case4 + specify stop row
    int stopRowNum = ROWSIZE / 4;
    scanner = region.getScanner(scan);
    verifyCountAndOrder(scanner, (ROWSIZE - stopRowNum - 1) * 2 * 2, (ROWSIZE - stopRowNum - 1), false);
    // Case7: Case4 + specify start row + specify stop row
    scanner = region.getScanner(scan);
    verifyCountAndOrder(scanner, (startRowNum - stopRowNum) * 2 * 2, (startRowNum - stopRowNum), false);
    // Case8: Case7 + SingleColumnValueFilter
    int valueNum = startRowNum % VALUESIZE;
    Filter filter = new SingleColumnValueFilter(FAMILYNAME, specifiedQualifiers[0], CompareOp.EQUAL, VALUES[valueNum]);
    scanner = region.getScanner(scan);
    int unfilteredRowNum = (startRowNum - stopRowNum) / VALUESIZE + (stopRowNum / VALUESIZE == valueNum ? 0 : 1);
    verifyCountAndOrder(scanner, unfilteredRowNum * 2 * 2, unfilteredRowNum, false);
    // Case9: Case7 + PageFilter
    int pageSize = 10;
    filter = new PageFilter(pageSize);
    scanner = region.getScanner(scan);
    int expectedRowNum = pageSize;
    verifyCountAndOrder(scanner, expectedRowNum * 2 * 2, expectedRowNum, false);
    // Case10: Case7 + FilterList+MUST_PASS_ONE
    SingleColumnValueFilter scvFilter1 = new SingleColumnValueFilter(FAMILYNAME, specifiedQualifiers[0], CompareOp.EQUAL, VALUES[0]);
    SingleColumnValueFilter scvFilter2 = new SingleColumnValueFilter(FAMILYNAME, specifiedQualifiers[0], CompareOp.EQUAL, VALUES[1]);
    expectedRowNum = 0;
    for (int i = startRowNum; i > stopRowNum; i--) {
        if (i % VALUESIZE == 0 || i % VALUESIZE == 1) {
    filter = new FilterList(Operator.MUST_PASS_ONE, scvFilter1, scvFilter2);
    scanner = region.getScanner(scan);
    verifyCountAndOrder(scanner, expectedRowNum * 2 * 2, expectedRowNum, false);
    // Case10: Case7 + FilterList+MUST_PASS_ALL
    filter = new FilterList(Operator.MUST_PASS_ALL, scvFilter1, scvFilter2);
    expectedRowNum = 0;
    scanner = region.getScanner(scan);
    verifyCountAndOrder(scanner, expectedRowNum * 2 * 2, expectedRowNum, false);
Also used : SingleColumnValueFilter(org.apache.hadoop.hbase.filter.SingleColumnValueFilter) HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) PageFilter(org.apache.hadoop.hbase.filter.PageFilter) Filter(org.apache.hadoop.hbase.filter.Filter) SingleColumnValueFilter(org.apache.hadoop.hbase.filter.SingleColumnValueFilter) Scan(org.apache.hadoop.hbase.client.Scan) PageFilter(org.apache.hadoop.hbase.filter.PageFilter) FilterList(org.apache.hadoop.hbase.filter.FilterList) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) Test(org.junit.Test)

Example 9 with PageFilter

use of org.apache.hadoop.hbase.filter.PageFilter in project YCSB by brianfrankcooper.

the class HBaseClient method scan.

   * Perform a range scan for a set of records in the database. Each field/value pair from the result will be stored
   * in a HashMap.
   * @param table       The name of the tableName
   * @param startkey    The record key of the first record to read.
   * @param recordcount The number of records to read
   * @param fields      The list of fields to read, or null for all of them
   * @param result      A Vector of HashMaps, where each HashMap is a set field/value pairs for one record
   * @return Zero on success, a non-zero error code on error
public Status scan(String table, String startkey, int recordcount, Set<String> fields, Vector<HashMap<String, ByteIterator>> result) {
    //if this is a "new" tableName, init HTable object.  Else, use existing one
    if (!this.tableName.equals(table)) {
        hTable = null;
        try {
            this.tableName = table;
        } catch (IOException e) {
            System.err.println("Error accessing HBase tableName: " + e);
            return Status.ERROR;
    Scan s = new Scan(Bytes.toBytes(startkey));
    //HBase has no record limit.  Here, assume recordcount is small enough to bring back in one call.
    //We get back recordcount records
    if (this.usePageFilter) {
        s.setFilter(new PageFilter(recordcount));
    //add specified fields or else all fields
    if (fields == null) {
    } else {
        for (String field : fields) {
            s.addColumn(columnFamilyBytes, Bytes.toBytes(field));
    //get results
    try (ResultScanner scanner = hTable.getScanner(s)) {
        int numResults = 0;
        for (Result rr =; rr != null; rr = {
            //get row key
            String key = Bytes.toString(rr.getRow());
            if (debug) {
                System.out.println("Got scan result for key: " + key);
            HashMap<String, ByteIterator> rowResult = new HashMap<>();
            for (KeyValue kv : rr.raw()) {
                rowResult.put(Bytes.toString(kv.getQualifier()), new ByteArrayByteIterator(kv.getValue()));
            //add rowResult to result vector
            //if hit recordcount, bail out
            if (numResults >= recordcount) {
    //done with row
    } catch (IOException e) {
        if (debug) {
            System.out.println("Error in getting/parsing scan result: " + e);
        return Status.ERROR;
    return Status.OK;
Also used : ByteArrayByteIterator( KeyValue(org.apache.hadoop.hbase.KeyValue) ByteArrayByteIterator( ByteIterator( PageFilter(org.apache.hadoop.hbase.filter.PageFilter) IOException(

Example 10 with PageFilter

use of org.apache.hadoop.hbase.filter.PageFilter in project incubator-atlas by apache.

the class HBaseBasedAuditRepository method listEvents.

     * List events for the given entity id in decreasing order of timestamp, from the given startKey. Returns n results
     * @param entityId entity id
     * @param startKey key for the first event to be returned, used for pagination
     * @param n number of events to be returned
     * @return list of events
     * @throws AtlasException
public List<EntityAuditEvent> listEvents(String entityId, String startKey, short n) throws AtlasException {
    if (LOG.isDebugEnabled()) {
        LOG.debug("Listing events for entity id {}, starting timestamp {}, #records {}", entityId, startKey, n);
    Table table = null;
    ResultScanner scanner = null;
    try {
        table = connection.getTable(tableName);
             * Scan Details:
             * In hbase, the events are stored in increasing order of timestamp. So, doing reverse scan to get the latest event first
             * Page filter is set to limit the number of results returned.
             * Stop row is set to the entity id to avoid going past the current entity while scanning
             * small is set to true to optimise RPC calls as the scanner is created per request
        Scan scan = new Scan().setReversed(true).setFilter(new PageFilter(n)).setStopRow(Bytes.toBytes(entityId)).setCaching(n).setSmall(true);
        if (StringUtils.isEmpty(startKey)) {
            //Set start row to entity id + max long value
            byte[] entityBytes = getKey(entityId, Long.MAX_VALUE);
            scan = scan.setStartRow(entityBytes);
        } else {
            scan = scan.setStartRow(Bytes.toBytes(startKey));
        scanner = table.getScanner(scan);
        Result result;
        List<EntityAuditEvent> events = new ArrayList<>();
        //So, adding extra check on n here
        while ((result = != null && events.size() < n) {
            EntityAuditEvent event = fromKey(result.getRow());
            //In case the user sets random start key, guarding against random events
            if (!event.getEntityId().equals(entityId)) {
            event.setUser(getResultString(result, COLUMN_USER));
            event.setAction(EntityAuditEvent.EntityAuditAction.valueOf(getResultString(result, COLUMN_ACTION)));
            event.setDetails(getResultString(result, COLUMN_DETAIL));
            if (persistEntityDefinition) {
                String colDef = getResultString(result, COLUMN_DEFINITION);
                if (colDef != null) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Got events for entity id {}, starting timestamp {}, #records {}", entityId, startKey, events.size());
        return events;
    } catch (IOException e) {
        throw new AtlasException(e);
    } finally {
Also used : EntityAuditEvent(org.apache.atlas.EntityAuditEvent) Table(org.apache.hadoop.hbase.client.Table) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) ArrayList(java.util.ArrayList) Scan(org.apache.hadoop.hbase.client.Scan) PageFilter(org.apache.hadoop.hbase.filter.PageFilter) IOException( AtlasException(org.apache.atlas.AtlasException) Result(org.apache.hadoop.hbase.client.Result)


PageFilter (org.apache.hadoop.hbase.filter.PageFilter)11 Scan (org.apache.hadoop.hbase.client.Scan)9 FilterList (org.apache.hadoop.hbase.filter.FilterList)4 IOException ( Result (org.apache.hadoop.hbase.client.Result)3 ResultScanner (org.apache.hadoop.hbase.client.ResultScanner)3 Filter (org.apache.hadoop.hbase.filter.Filter)3 ByteArrayByteIterator ( ByteIterator ( Cell (org.apache.hadoop.hbase.Cell)2 KeyValue (org.apache.hadoop.hbase.KeyValue)2 Table (org.apache.hadoop.hbase.client.Table)2 FirstKeyOnlyFilter (org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter)2 TimelineReaderContext (org.apache.hadoop.yarn.server.timelineservice.reader.TimelineReaderContext)2 TimelineFilterList (org.apache.hadoop.yarn.server.timelineservice.reader.filter.TimelineFilterList)2 DistinctPrefixFilter (org.apache.phoenix.filter.DistinctPrefixFilter)2 Test (org.junit.Test)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 LinkedList (java.util.LinkedList)1