Search in sources :

Example 1 with PageFilter

use of org.apache.hadoop.hbase.filter.PageFilter in project hadoop by apache.

the class ApplicationEntityReader method getResults.

@Override
protected ResultScanner getResults(Configuration hbaseConf, Connection conn, FilterList filterList) throws IOException {
    Scan scan = new Scan();
    TimelineReaderContext context = getContext();
    // Whether or not flowRunID is null doesn't matter, the
    // ApplicationRowKeyPrefix will do the right thing.
    RowKeyPrefix<ApplicationRowKey> applicationRowKeyPrefix = new ApplicationRowKeyPrefix(context.getClusterId(), context.getUserId(), context.getFlowName(), context.getFlowRunId());
    scan.setRowPrefixFilter(applicationRowKeyPrefix.getRowKeyPrefix());
    FilterList newList = new FilterList();
    newList.addFilter(new PageFilter(getFilters().getLimit()));
    if (filterList != null && !filterList.getFilters().isEmpty()) {
        newList.addFilter(filterList);
    }
    scan.setFilter(newList);
    scan.setMaxVersions(getDataToRetrieve().getMetricsLimit());
    return getTable().getResultScanner(hbaseConf, conn, scan);
}
Also used : ApplicationRowKeyPrefix(org.apache.hadoop.yarn.server.timelineservice.storage.application.ApplicationRowKeyPrefix) TimelineReaderContext(org.apache.hadoop.yarn.server.timelineservice.reader.TimelineReaderContext) ApplicationRowKey(org.apache.hadoop.yarn.server.timelineservice.storage.application.ApplicationRowKey) Scan(org.apache.hadoop.hbase.client.Scan) FilterList(org.apache.hadoop.hbase.filter.FilterList) TimelineFilterList(org.apache.hadoop.yarn.server.timelineservice.reader.filter.TimelineFilterList) PageFilter(org.apache.hadoop.hbase.filter.PageFilter)

Example 2 with PageFilter

use of org.apache.hadoop.hbase.filter.PageFilter in project YCSB by brianfrankcooper.

the class HBaseClient10 method scan.

/**
   * Perform a range scan for a set of records in the database. Each field/value
   * pair from the result will be stored in a HashMap.
   *
   * @param table
   *          The name of the table
   * @param startkey
   *          The record key of the first record to read.
   * @param recordcount
   *          The number of records to read
   * @param fields
   *          The list of fields to read, or null for all of them
   * @param result
   *          A Vector of HashMaps, where each HashMap is a set field/value
   *          pairs for one record
   * @return Zero on success, a non-zero error code on error
   */
@Override
public Status scan(String table, String startkey, int recordcount, Set<String> fields, Vector<HashMap<String, ByteIterator>> result) {
    // if this is a "new" table, init HTable object. Else, use existing one
    if (!tableName.equals(table)) {
        currentTable = null;
        try {
            getHTable(table);
            tableName = table;
        } catch (IOException e) {
            System.err.println("Error accessing HBase table: " + e);
            return Status.ERROR;
        }
    }
    Scan s = new Scan(Bytes.toBytes(startkey));
    // HBase has no record limit. Here, assume recordcount is small enough to
    // bring back in one call.
    // We get back recordcount records
    s.setCaching(recordcount);
    if (this.usePageFilter) {
        s.setFilter(new PageFilter(recordcount));
    }
    // add specified fields or else all fields
    if (fields == null) {
        s.addFamily(columnFamilyBytes);
    } else {
        for (String field : fields) {
            s.addColumn(columnFamilyBytes, Bytes.toBytes(field));
        }
    }
    // get results
    ResultScanner scanner = null;
    try {
        scanner = currentTable.getScanner(s);
        int numResults = 0;
        for (Result rr = scanner.next(); rr != null; rr = scanner.next()) {
            // get row key
            String key = Bytes.toString(rr.getRow());
            if (debug) {
                System.out.println("Got scan result for key: " + key);
            }
            HashMap<String, ByteIterator> rowResult = new HashMap<String, ByteIterator>();
            while (rr.advance()) {
                final Cell cell = rr.current();
                rowResult.put(Bytes.toString(CellUtil.cloneQualifier(cell)), new ByteArrayByteIterator(CellUtil.cloneValue(cell)));
            }
            // add rowResult to result vector
            result.add(rowResult);
            numResults++;
            // break is required.
            if (numResults >= recordcount) {
                // if hit recordcount, bail out
                break;
            }
        }
    // done with row
    } catch (IOException e) {
        if (debug) {
            System.out.println("Error in getting/parsing scan result: " + e);
        }
        return Status.ERROR;
    } finally {
        if (scanner != null) {
            scanner.close();
        }
    }
    return Status.OK;
}
Also used : ByteArrayByteIterator(com.yahoo.ycsb.ByteArrayByteIterator) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) ByteArrayByteIterator(com.yahoo.ycsb.ByteArrayByteIterator) ByteIterator(com.yahoo.ycsb.ByteIterator) HashMap(java.util.HashMap) Scan(org.apache.hadoop.hbase.client.Scan) PageFilter(org.apache.hadoop.hbase.filter.PageFilter) IOException(java.io.IOException) Cell(org.apache.hadoop.hbase.Cell) Result(org.apache.hadoop.hbase.client.Result)

Example 3 with PageFilter

use of org.apache.hadoop.hbase.filter.PageFilter in project camel by apache.

the class HBaseConsumer method poll.

@Override
protected int poll() throws Exception {
    try (Table table = endpoint.getTable()) {
        shutdownRunningTask = null;
        pendingExchanges = 0;
        Queue<Exchange> queue = new LinkedList<>();
        Scan scan = new Scan();
        List<Filter> filters = new LinkedList<>();
        if (endpoint.getFilters() != null) {
            filters.addAll(endpoint.getFilters());
        }
        if (maxMessagesPerPoll > 0) {
            filters.add(new PageFilter(maxMessagesPerPoll));
        }
        if (!filters.isEmpty()) {
            Filter compoundFilter = new FilterList(filters);
            scan.setFilter(compoundFilter);
        }
        if (rowModel != null && rowModel.getCells() != null) {
            Set<HBaseCell> cellModels = rowModel.getCells();
            for (HBaseCell cellModel : cellModels) {
                scan.addColumn(HBaseHelper.getHBaseFieldAsBytes(cellModel.getFamily()), HBaseHelper.getHBaseFieldAsBytes(cellModel.getQualifier()));
            }
        }
        ResultScanner scanner = table.getScanner(scan);
        int exchangeCount = 0;
        // The next three statements are used just to get a reference to the BodyCellMappingStrategy instance.
        Exchange exchange = endpoint.createExchange();
        exchange.getIn().setHeader(CellMappingStrategyFactory.STRATEGY, CellMappingStrategyFactory.BODY);
        CellMappingStrategy mappingStrategy = endpoint.getCellMappingStrategyFactory().getStrategy(exchange.getIn());
        for (Result result = scanner.next(); (exchangeCount < maxMessagesPerPoll || maxMessagesPerPoll <= 0) && result != null; result = scanner.next()) {
            HBaseData data = new HBaseData();
            HBaseRow resultRow = new HBaseRow();
            resultRow.apply(rowModel);
            byte[] row = result.getRow();
            resultRow.setId(endpoint.getCamelContext().getTypeConverter().convertTo(rowModel.getRowType(), row));
            List<Cell> cells = result.listCells();
            if (cells != null) {
                Set<HBaseCell> cellModels = rowModel.getCells();
                if (cellModels.size() > 0) {
                    for (HBaseCell modelCell : cellModels) {
                        HBaseCell resultCell = new HBaseCell();
                        String family = modelCell.getFamily();
                        String column = modelCell.getQualifier();
                        resultCell.setValue(endpoint.getCamelContext().getTypeConverter().convertTo(modelCell.getValueType(), result.getValue(HBaseHelper.getHBaseFieldAsBytes(family), HBaseHelper.getHBaseFieldAsBytes(column))));
                        resultCell.setFamily(modelCell.getFamily());
                        resultCell.setQualifier(modelCell.getQualifier());
                        resultRow.getCells().add(resultCell);
                    }
                } else {
                    // just need to put every key value into the result Cells
                    for (Cell cell : cells) {
                        String qualifier = new String(CellUtil.cloneQualifier(cell));
                        String family = new String(CellUtil.cloneFamily(cell));
                        HBaseCell resultCell = new HBaseCell();
                        resultCell.setFamily(family);
                        resultCell.setQualifier(qualifier);
                        resultCell.setValue(endpoint.getCamelContext().getTypeConverter().convertTo(String.class, CellUtil.cloneValue(cell)));
                        resultRow.getCells().add(resultCell);
                    }
                }
                data.getRows().add(resultRow);
                exchange = endpoint.createExchange();
                // Probably overkill but kept it here for consistency.
                exchange.getIn().setHeader(CellMappingStrategyFactory.STRATEGY, CellMappingStrategyFactory.BODY);
                mappingStrategy.applyScanResults(exchange.getIn(), data);
                //Make sure that there is a header containing the marked row ids, so that they can be deleted.
                exchange.getIn().setHeader(HBaseAttribute.HBASE_MARKED_ROW_ID.asHeader(), result.getRow());
                queue.add(exchange);
                exchangeCount++;
            }
        }
        scanner.close();
        return queue.isEmpty() ? 0 : processBatch(CastUtils.cast(queue));
    }
}
Also used : Table(org.apache.hadoop.hbase.client.Table) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) CellMappingStrategy(org.apache.camel.component.hbase.mapping.CellMappingStrategy) FilterList(org.apache.hadoop.hbase.filter.FilterList) LinkedList(java.util.LinkedList) HBaseCell(org.apache.camel.component.hbase.model.HBaseCell) Result(org.apache.hadoop.hbase.client.Result) Exchange(org.apache.camel.Exchange) PageFilter(org.apache.hadoop.hbase.filter.PageFilter) Filter(org.apache.hadoop.hbase.filter.Filter) HBaseData(org.apache.camel.component.hbase.model.HBaseData) HBaseRow(org.apache.camel.component.hbase.model.HBaseRow) Scan(org.apache.hadoop.hbase.client.Scan) PageFilter(org.apache.hadoop.hbase.filter.PageFilter) HBaseCell(org.apache.camel.component.hbase.model.HBaseCell) Cell(org.apache.hadoop.hbase.Cell)

Example 4 with PageFilter

use of org.apache.hadoop.hbase.filter.PageFilter in project phoenix by apache.

the class BaseResultIterators method initializeScan.

private static void initializeScan(QueryPlan plan, Integer perScanLimit, Integer offset, Scan scan) throws SQLException {
    StatementContext context = plan.getContext();
    TableRef tableRef = plan.getTableRef();
    PTable table = tableRef.getTable();
    Map<byte[], NavigableSet<byte[]>> familyMap = scan.getFamilyMap();
    // Hack for PHOENIX-2067 to force raw scan over all KeyValues to fix their row keys
    if (context.getConnection().isDescVarLengthRowKeyUpgrade()) {
        // We project *all* KeyValues across all column families as we make a pass over
        // a physical table and we want to make sure we catch all KeyValues that may be
        // dynamic or part of an updatable view.
        familyMap.clear();
        scan.setMaxVersions();
        // Remove any filter
        scan.setFilter(null);
        // Traverse (and subsequently clone) all KeyValues
        scan.setRaw(true);
        // Pass over PTable so we can re-write rows according to the row key schema
        scan.setAttribute(BaseScannerRegionObserver.UPGRADE_DESC_ROW_KEY, UngroupedAggregateRegionObserver.serialize(table));
    } else {
        FilterableStatement statement = plan.getStatement();
        RowProjector projector = plan.getProjector();
        boolean optimizeProjection = false;
        boolean keyOnlyFilter = familyMap.isEmpty() && context.getWhereConditionColumns().isEmpty();
        if (!projector.projectEverything()) {
            // not match the actual column families of the table (which is bad).
            if (keyOnlyFilter && table.getColumnFamilies().size() == 1) {
                // Project the one column family. We must project a column family since it's possible
                // that there are other non declared column families that we need to ignore.
                scan.addFamily(table.getColumnFamilies().get(0).getName().getBytes());
            } else {
                optimizeProjection = true;
                if (projector.projectEveryRow()) {
                    if (table.getViewType() == ViewType.MAPPED) {
                        // Since we don't have the empty key value in MAPPED tables,
                        // we must project all CFs in HRS. However, only the
                        // selected column values are returned back to client.
                        context.getWhereConditionColumns().clear();
                        for (PColumnFamily family : table.getColumnFamilies()) {
                            context.addWhereConditionColumn(family.getName().getBytes(), null);
                        }
                    } else {
                        byte[] ecf = SchemaUtil.getEmptyColumnFamily(table);
                        // been projected in its entirety.
                        if (!familyMap.containsKey(ecf) || familyMap.get(ecf) != null) {
                            scan.addColumn(ecf, EncodedColumnsUtil.getEmptyKeyValueInfo(table).getFirst());
                        }
                    }
                }
            }
        }
        // Add FirstKeyOnlyFilter if there are no references to key value columns
        if (keyOnlyFilter) {
            ScanUtil.andFilterAtBeginning(scan, new FirstKeyOnlyFilter());
        }
        if (perScanLimit != null) {
            ScanUtil.andFilterAtEnd(scan, new PageFilter(perScanLimit));
        }
        if (offset != null) {
            ScanUtil.addOffsetAttribute(scan, offset);
        }
        int cols = plan.getGroupBy().getOrderPreservingColumnCount();
        if (cols > 0 && keyOnlyFilter && !plan.getStatement().getHint().hasHint(HintNode.Hint.RANGE_SCAN) && cols < plan.getTableRef().getTable().getRowKeySchema().getFieldCount() && plan.getGroupBy().isOrderPreserving() && (context.getAggregationManager().isEmpty() || plan.getGroupBy().isUngroupedAggregate())) {
            ScanUtil.andFilterAtEnd(scan, new DistinctPrefixFilter(plan.getTableRef().getTable().getRowKeySchema(), cols));
            if (plan.getLimit() != null) {
                // We can push the limit to the server
                ScanUtil.andFilterAtEnd(scan, new PageFilter(plan.getLimit()));
            }
        }
        scan.setAttribute(BaseScannerRegionObserver.QUALIFIER_ENCODING_SCHEME, new byte[] { table.getEncodingScheme().getSerializedMetadataValue() });
        scan.setAttribute(BaseScannerRegionObserver.IMMUTABLE_STORAGE_ENCODING_SCHEME, new byte[] { table.getImmutableStorageScheme().getSerializedMetadataValue() });
        // we use this flag on the server side to determine which value column qualifier to use in the key value we return from server.
        scan.setAttribute(BaseScannerRegionObserver.USE_NEW_VALUE_COLUMN_QUALIFIER, Bytes.toBytes(true));
        // So there is no point setting the range.
        if (!ScanUtil.isAnalyzeTable(scan)) {
            setQualifierRanges(keyOnlyFilter, table, scan, context);
        }
        if (optimizeProjection) {
            optimizeProjection(context, scan, table, statement);
        }
    }
}
Also used : NavigableSet(java.util.NavigableSet) FirstKeyOnlyFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter) DistinctPrefixFilter(org.apache.phoenix.filter.DistinctPrefixFilter) PColumnFamily(org.apache.phoenix.schema.PColumnFamily) PTable(org.apache.phoenix.schema.PTable) Hint(org.apache.phoenix.parse.HintNode.Hint) StatementContext(org.apache.phoenix.compile.StatementContext) RowProjector(org.apache.phoenix.compile.RowProjector) FilterableStatement(org.apache.phoenix.parse.FilterableStatement) PageFilter(org.apache.hadoop.hbase.filter.PageFilter) TableRef(org.apache.phoenix.schema.TableRef)

Example 5 with PageFilter

use of org.apache.hadoop.hbase.filter.PageFilter in project atlas by apache.

the class HBaseBasedAuditRepository method listEventsV1.

/**
 * List events for the given entity id in decreasing order of timestamp, from the given startKey. Returns n results
 * @param entityId entity id
 * @param startKey key for the first event to be returned, used for pagination
 * @param n number of events to be returned
 * @return list of events
 * @throws AtlasException
 */
public List<EntityAuditEvent> listEventsV1(String entityId, String startKey, short n) throws AtlasException {
    if (LOG.isDebugEnabled()) {
        LOG.debug("Listing events for entity id {}, starting timestamp {}, #records {}", entityId, startKey, n);
    }
    Table table = null;
    ResultScanner scanner = null;
    try {
        table = connection.getTable(tableName);
        /**
         * Scan Details:
         * In hbase, the events are stored in increasing order of timestamp. So, doing reverse scan to get the latest event first
         * Page filter is set to limit the number of results returned.
         * Stop row is set to the entity id to avoid going past the current entity while scanning
         * small is set to true to optimise RPC calls as the scanner is created per request
         */
        Scan scan = new Scan().setReversed(true).setFilter(new PageFilter(n)).setStopRow(Bytes.toBytes(entityId)).setCaching(n).setSmall(true);
        if (StringUtils.isEmpty(startKey)) {
            // Set start row to entity id + max long value
            byte[] entityBytes = getKey(entityId, Long.MAX_VALUE);
            scan = scan.setStartRow(entityBytes);
        } else {
            scan = scan.setStartRow(Bytes.toBytes(startKey));
        }
        scanner = table.getScanner(scan);
        Result result;
        List<EntityAuditEvent> events = new ArrayList<>();
        // So, adding extra check on n here
        while ((result = scanner.next()) != null && events.size() < n) {
            EntityAuditEvent event = fromKey(result.getRow());
            // In case the user sets random start key, guarding against random events
            if (!event.getEntityId().equals(entityId)) {
                continue;
            }
            event.setUser(getResultString(result, COLUMN_USER));
            event.setAction(EntityAuditEvent.EntityAuditAction.fromString(getResultString(result, COLUMN_ACTION)));
            event.setDetails(getResultString(result, COLUMN_DETAIL));
            if (persistEntityDefinition) {
                String colDef = getResultString(result, COLUMN_DEFINITION);
                if (colDef != null) {
                    event.setEntityDefinition(colDef);
                }
            }
            events.add(event);
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("Got events for entity id {}, starting timestamp {}, #records {}", entityId, startKey, events.size());
        }
        return events;
    } catch (IOException e) {
        throw new AtlasException(e);
    } finally {
        close(scanner);
        close(table);
    }
}
Also used : EntityAuditEvent(org.apache.atlas.EntityAuditEvent) Table(org.apache.hadoop.hbase.client.Table) ResultScanner(org.apache.hadoop.hbase.client.ResultScanner) ArrayList(java.util.ArrayList) Scan(org.apache.hadoop.hbase.client.Scan) PageFilter(org.apache.hadoop.hbase.filter.PageFilter) IOException(java.io.IOException) AtlasException(org.apache.atlas.AtlasException) Result(org.apache.hadoop.hbase.client.Result)

Aggregations

PageFilter (org.apache.hadoop.hbase.filter.PageFilter)18 Scan (org.apache.hadoop.hbase.client.Scan)14 IOException (java.io.IOException)9 Result (org.apache.hadoop.hbase.client.Result)8 ResultScanner (org.apache.hadoop.hbase.client.ResultScanner)8 FilterList (org.apache.hadoop.hbase.filter.FilterList)6 Table (org.apache.hadoop.hbase.client.Table)5 ArrayList (java.util.ArrayList)4 Cell (org.apache.hadoop.hbase.Cell)4 HashMap (java.util.HashMap)3 AtlasException (org.apache.atlas.AtlasException)3 Filter (org.apache.hadoop.hbase.filter.Filter)3 ByteArrayByteIterator (com.yahoo.ycsb.ByteArrayByteIterator)2 ByteIterator (com.yahoo.ycsb.ByteIterator)2 EntityAuditEvent (org.apache.atlas.EntityAuditEvent)2 KeyValue (org.apache.hadoop.hbase.KeyValue)2 FirstKeyOnlyFilter (org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter)2 TimelineReaderContext (org.apache.hadoop.yarn.server.timelineservice.reader.TimelineReaderContext)2 TimelineFilterList (org.apache.hadoop.yarn.server.timelineservice.reader.filter.TimelineFilterList)2 DistinctPrefixFilter (org.apache.phoenix.filter.DistinctPrefixFilter)2