Search in sources :

Example 6 with FirstKeyOnlyFilter

use of org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter in project hbase by apache.

the class TestFromClientSide method testFilterAllRecords.

@Test
public void testFilterAllRecords() throws IOException {
    Scan scan = new Scan();
    scan.setBatch(1);
    scan.setCaching(1);
    // Filter out any records
    scan.setFilter(new FilterList(new FirstKeyOnlyFilter(), new InclusiveStopFilter(new byte[0])));
    try (Table table = TEST_UTIL.getConnection().getTable(TableName.NAMESPACE_TABLE_NAME)) {
        try (ResultScanner s = table.getScanner(scan)) {
            assertNull(s.next());
        }
    }
}
Also used : FirstKeyOnlyFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter) InclusiveStopFilter(org.apache.hadoop.hbase.filter.InclusiveStopFilter) FilterList(org.apache.hadoop.hbase.filter.FilterList) Test(org.junit.Test)

Example 7 with FirstKeyOnlyFilter

use of org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter in project hbase by apache.

the class TestSeekBeforeWithReverseScan method testReverseScanWithoutPadding.

@Test
public void testReverseScanWithoutPadding() throws Exception {
    byte[] row1 = Bytes.toBytes("a");
    byte[] row2 = Bytes.toBytes("ab");
    byte[] row3 = Bytes.toBytes("b");
    Put put1 = new Put(row1);
    put1.addColumn(cfName, cqName, HConstants.EMPTY_BYTE_ARRAY);
    Put put2 = new Put(row2);
    put2.addColumn(cfName, cqName, HConstants.EMPTY_BYTE_ARRAY);
    Put put3 = new Put(row3);
    put3.addColumn(cfName, cqName, HConstants.EMPTY_BYTE_ARRAY);
    region.put(put1);
    region.put(put2);
    region.put(put3);
    region.flush(true);
    Scan scan = new Scan();
    scan.setCacheBlocks(false);
    scan.setReversed(true);
    scan.setFilter(new FirstKeyOnlyFilter());
    scan.addFamily(cfName);
    RegionScanner scanner = region.getScanner(scan);
    List<Cell> res = new ArrayList<>();
    int count = 1;
    while (scanner.next(res)) {
        count++;
    }
    assertEquals(Bytes.toString(res.get(0).getRowArray(), res.get(0).getRowOffset(), res.get(0).getRowLength()), "b");
    assertEquals(Bytes.toString(res.get(1).getRowArray(), res.get(1).getRowOffset(), res.get(1).getRowLength()), "ab");
    assertEquals(Bytes.toString(res.get(2).getRowArray(), res.get(2).getRowOffset(), res.get(2).getRowLength()), "a");
    assertEquals(3, count);
}
Also used : RegionScanner(org.apache.hadoop.hbase.regionserver.RegionScanner) FirstKeyOnlyFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter) ArrayList(java.util.ArrayList) Scan(org.apache.hadoop.hbase.client.Scan) Cell(org.apache.hadoop.hbase.Cell) Put(org.apache.hadoop.hbase.client.Put) Test(org.junit.Test)

Example 8 with FirstKeyOnlyFilter

use of org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter in project phoenix by apache.

the class PhoenixIndexBuilder method executeAtomicOp.

@Override
public List<Mutation> executeAtomicOp(Increment inc) throws IOException {
    byte[] opBytes = inc.getAttribute(ATOMIC_OP_ATTRIB);
    if (opBytes == null) {
        // Unexpected
        return null;
    }
    inc.setAttribute(ATOMIC_OP_ATTRIB, null);
    Put put = null;
    Delete delete = null;
    // We cannot neither use the time stamp in the Increment to set the Get time range
    // nor set the Put/Delete time stamp and have this be atomic as HBase does not
    // handle that. Though we disallow using ON DUPLICATE KEY clause when the
    // CURRENT_SCN is set, we still may have a time stamp set as of when the table
    // was resolved on the client side. We need to ignore this as well due to limitations
    // in HBase, but this isn't too bad as the time will be very close the the current
    // time anyway.
    long ts = HConstants.LATEST_TIMESTAMP;
    byte[] rowKey = inc.getRow();
    final Get get = new Get(rowKey);
    if (isDupKeyIgnore(opBytes)) {
        get.setFilter(new FirstKeyOnlyFilter());
        Result result = this.env.getRegion().get(get);
        return result.isEmpty() ? convertIncrementToPutInSingletonList(inc) : Collections.<Mutation>emptyList();
    }
    ByteArrayInputStream stream = new ByteArrayInputStream(opBytes);
    DataInputStream input = new DataInputStream(stream);
    boolean skipFirstOp = input.readBoolean();
    short repeat = input.readShort();
    final int[] estimatedSizeHolder = { 0 };
    List<Pair<PTable, List<Expression>>> operations = Lists.newArrayListWithExpectedSize(3);
    while (true) {
        ExpressionVisitor<Void> visitor = new StatelessTraverseAllExpressionVisitor<Void>() {

            @Override
            public Void visit(KeyValueColumnExpression expression) {
                get.addColumn(expression.getColumnFamily(), expression.getColumnQualifier());
                estimatedSizeHolder[0]++;
                return null;
            }
        };
        try {
            int nExpressions = WritableUtils.readVInt(input);
            List<Expression> expressions = Lists.newArrayListWithExpectedSize(nExpressions);
            for (int i = 0; i < nExpressions; i++) {
                Expression expression = ExpressionType.values()[WritableUtils.readVInt(input)].newInstance();
                expression.readFields(input);
                expressions.add(expression);
                expression.accept(visitor);
            }
            PTableProtos.PTable tableProto = PTableProtos.PTable.parseDelimitedFrom(input);
            PTable table = PTableImpl.createFromProto(tableProto);
            operations.add(new Pair<>(table, expressions));
        } catch (EOFException e) {
            break;
        }
    }
    int estimatedSize = estimatedSizeHolder[0];
    if (get.getFamilyMap().isEmpty()) {
        get.setFilter(new FirstKeyOnlyFilter());
    }
    MultiKeyValueTuple tuple;
    List<Cell> flattenedCells = null;
    List<Cell> cells = ((HRegion) this.env.getRegion()).get(get, false);
    if (cells.isEmpty()) {
        if (skipFirstOp) {
            if (operations.size() <= 1 && repeat <= 1) {
                return convertIncrementToPutInSingletonList(inc);
            }
            // Skip first operation (if first wasn't ON DUPLICATE KEY IGNORE)
            repeat--;
        }
        // Base current state off of new row
        flattenedCells = flattenCells(inc, estimatedSize);
        tuple = new MultiKeyValueTuple(flattenedCells);
    } else {
        // Base current state off of existing row
        tuple = new MultiKeyValueTuple(cells);
    }
    ImmutableBytesWritable ptr = new ImmutableBytesWritable();
    for (int opIndex = 0; opIndex < operations.size(); opIndex++) {
        Pair<PTable, List<Expression>> operation = operations.get(opIndex);
        PTable table = operation.getFirst();
        List<Expression> expressions = operation.getSecond();
        for (int j = 0; j < repeat; j++) {
            // repeater loop
            ptr.set(rowKey);
            // executed, not when the outer loop is exited. Hence we do it here, at the top of the loop.
            if (flattenedCells != null) {
                Collections.sort(flattenedCells, KeyValue.COMPARATOR);
            }
            PRow row = table.newRow(GenericKeyValueBuilder.INSTANCE, ts, ptr, false);
            for (int i = 0; i < expressions.size(); i++) {
                Expression expression = expressions.get(i);
                ptr.set(ByteUtil.EMPTY_BYTE_ARRAY);
                expression.evaluate(tuple, ptr);
                PColumn column = table.getColumns().get(i + 1);
                Object value = expression.getDataType().toObject(ptr, column.getSortOrder());
                // same type.
                if (!column.getDataType().isSizeCompatible(ptr, value, column.getDataType(), expression.getSortOrder(), expression.getMaxLength(), expression.getScale(), column.getMaxLength(), column.getScale())) {
                    throw new DataExceedsCapacityException(column.getDataType(), column.getMaxLength(), column.getScale());
                }
                column.getDataType().coerceBytes(ptr, value, expression.getDataType(), expression.getMaxLength(), expression.getScale(), expression.getSortOrder(), column.getMaxLength(), column.getScale(), column.getSortOrder(), table.rowKeyOrderOptimizable());
                byte[] bytes = ByteUtil.copyKeyBytesIfNecessary(ptr);
                row.setValue(column, bytes);
            }
            flattenedCells = Lists.newArrayListWithExpectedSize(estimatedSize);
            List<Mutation> mutations = row.toRowMutations();
            for (Mutation source : mutations) {
                flattenCells(source, flattenedCells);
            }
            tuple.setKeyValues(flattenedCells);
        }
        // Repeat only applies to first statement
        repeat = 1;
    }
    List<Mutation> mutations = Lists.newArrayListWithExpectedSize(2);
    for (int i = 0; i < tuple.size(); i++) {
        Cell cell = tuple.getValue(i);
        if (Type.codeToType(cell.getTypeByte()) == Type.Put) {
            if (put == null) {
                put = new Put(rowKey);
                transferAttributes(inc, put);
                mutations.add(put);
            }
            put.add(cell);
        } else {
            if (delete == null) {
                delete = new Delete(rowKey);
                transferAttributes(inc, delete);
                mutations.add(delete);
            }
            delete.addDeleteMarker(cell);
        }
    }
    return mutations;
}
Also used : Delete(org.apache.hadoop.hbase.client.Delete) PTable(org.apache.phoenix.schema.PTable) Result(org.apache.hadoop.hbase.client.Result) PRow(org.apache.phoenix.schema.PRow) PColumn(org.apache.phoenix.schema.PColumn) StatelessTraverseAllExpressionVisitor(org.apache.phoenix.expression.visitor.StatelessTraverseAllExpressionVisitor) EOFException(java.io.EOFException) List(java.util.List) KeyValueColumnExpression(org.apache.phoenix.expression.KeyValueColumnExpression) Cell(org.apache.hadoop.hbase.Cell) Pair(org.apache.hadoop.hbase.util.Pair) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) FirstKeyOnlyFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter) DataInputStream(java.io.DataInputStream) Put(org.apache.hadoop.hbase.client.Put) PTableProtos(org.apache.phoenix.coprocessor.generated.PTableProtos) HRegion(org.apache.hadoop.hbase.regionserver.HRegion) DataExceedsCapacityException(org.apache.phoenix.exception.DataExceedsCapacityException) ByteArrayInputStream(java.io.ByteArrayInputStream) KeyValueColumnExpression(org.apache.phoenix.expression.KeyValueColumnExpression) Expression(org.apache.phoenix.expression.Expression) Get(org.apache.hadoop.hbase.client.Get) MultiKeyValueTuple(org.apache.phoenix.schema.tuple.MultiKeyValueTuple) Mutation(org.apache.hadoop.hbase.client.Mutation)

Example 9 with FirstKeyOnlyFilter

use of org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter in project phoenix by apache.

the class BaseResultIterators method initializeScan.

private static void initializeScan(QueryPlan plan, Integer perScanLimit, Integer offset, Scan scan) throws SQLException {
    StatementContext context = plan.getContext();
    TableRef tableRef = plan.getTableRef();
    PTable table = tableRef.getTable();
    Map<byte[], NavigableSet<byte[]>> familyMap = scan.getFamilyMap();
    // Hack for PHOENIX-2067 to force raw scan over all KeyValues to fix their row keys
    if (context.getConnection().isDescVarLengthRowKeyUpgrade()) {
        // We project *all* KeyValues across all column families as we make a pass over
        // a physical table and we want to make sure we catch all KeyValues that may be
        // dynamic or part of an updatable view.
        familyMap.clear();
        scan.setMaxVersions();
        // Remove any filter
        scan.setFilter(null);
        // Traverse (and subsequently clone) all KeyValues
        scan.setRaw(true);
        // Pass over PTable so we can re-write rows according to the row key schema
        scan.setAttribute(BaseScannerRegionObserver.UPGRADE_DESC_ROW_KEY, UngroupedAggregateRegionObserver.serialize(table));
    } else {
        FilterableStatement statement = plan.getStatement();
        RowProjector projector = plan.getProjector();
        boolean optimizeProjection = false;
        boolean keyOnlyFilter = familyMap.isEmpty() && context.getWhereConditionColumns().isEmpty();
        if (!projector.projectEverything()) {
            // not match the actual column families of the table (which is bad).
            if (keyOnlyFilter && table.getColumnFamilies().size() == 1) {
                // Project the one column family. We must project a column family since it's possible
                // that there are other non declared column families that we need to ignore.
                scan.addFamily(table.getColumnFamilies().get(0).getName().getBytes());
            } else {
                optimizeProjection = true;
                if (projector.projectEveryRow()) {
                    if (table.getViewType() == ViewType.MAPPED) {
                        // Since we don't have the empty key value in MAPPED tables, 
                        // we must project all CFs in HRS. However, only the
                        // selected column values are returned back to client.
                        context.getWhereConditionColumns().clear();
                        for (PColumnFamily family : table.getColumnFamilies()) {
                            context.addWhereConditionColumn(family.getName().getBytes(), null);
                        }
                    } else {
                        byte[] ecf = SchemaUtil.getEmptyColumnFamily(table);
                        // been projected in its entirety.
                        if (!familyMap.containsKey(ecf) || familyMap.get(ecf) != null) {
                            scan.addColumn(ecf, EncodedColumnsUtil.getEmptyKeyValueInfo(table).getFirst());
                        }
                    }
                }
            }
        }
        // Add FirstKeyOnlyFilter if there are no references to key value columns
        if (keyOnlyFilter) {
            ScanUtil.andFilterAtBeginning(scan, new FirstKeyOnlyFilter());
        }
        if (perScanLimit != null) {
            ScanUtil.andFilterAtEnd(scan, new PageFilter(perScanLimit));
        }
        if (offset != null) {
            ScanUtil.addOffsetAttribute(scan, offset);
        }
        int cols = plan.getGroupBy().getOrderPreservingColumnCount();
        if (cols > 0 && keyOnlyFilter && !plan.getStatement().getHint().hasHint(HintNode.Hint.RANGE_SCAN) && cols < plan.getTableRef().getTable().getRowKeySchema().getFieldCount() && plan.getGroupBy().isOrderPreserving() && (context.getAggregationManager().isEmpty() || plan.getGroupBy().isUngroupedAggregate())) {
            ScanUtil.andFilterAtEnd(scan, new DistinctPrefixFilter(plan.getTableRef().getTable().getRowKeySchema(), cols));
            if (plan.getLimit() != null) {
                // We can push the limit to the server
                ScanUtil.andFilterAtEnd(scan, new PageFilter(plan.getLimit()));
            }
        }
        scan.setAttribute(BaseScannerRegionObserver.QUALIFIER_ENCODING_SCHEME, new byte[] { table.getEncodingScheme().getSerializedMetadataValue() });
        scan.setAttribute(BaseScannerRegionObserver.IMMUTABLE_STORAGE_ENCODING_SCHEME, new byte[] { table.getImmutableStorageScheme().getSerializedMetadataValue() });
        // we use this flag on the server side to determine which value column qualifier to use in the key value we return from server.
        scan.setAttribute(BaseScannerRegionObserver.USE_NEW_VALUE_COLUMN_QUALIFIER, Bytes.toBytes(true));
        // So there is no point setting the range.
        if (!ScanUtil.isAnalyzeTable(scan)) {
            setQualifierRanges(keyOnlyFilter, table, scan, context);
        }
        if (optimizeProjection) {
            optimizeProjection(context, scan, table, statement);
        }
    }
}
Also used : NavigableSet(java.util.NavigableSet) FirstKeyOnlyFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter) DistinctPrefixFilter(org.apache.phoenix.filter.DistinctPrefixFilter) PColumnFamily(org.apache.phoenix.schema.PColumnFamily) PTable(org.apache.phoenix.schema.PTable) Hint(org.apache.phoenix.parse.HintNode.Hint) StatementContext(org.apache.phoenix.compile.StatementContext) RowProjector(org.apache.phoenix.compile.RowProjector) FilterableStatement(org.apache.phoenix.parse.FilterableStatement) PageFilter(org.apache.hadoop.hbase.filter.PageFilter) TableRef(org.apache.phoenix.schema.TableRef)

Example 10 with FirstKeyOnlyFilter

use of org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter in project hbase by apache.

the class TestPartialResultsFromClientSide method testPartialResultsWithColumnFilter.

/**
   * Test partial Result re-assembly in the presence of different filters. The Results from the
   * partial scanner should match the Results returned from a scanner that receives all of the
   * results in one RPC to the server. The partial scanner is tested with a variety of different
   * result sizes (all of which are less than the size necessary to fetch an entire row)
   * @throws Exception
   */
@Test
public void testPartialResultsWithColumnFilter() throws Exception {
    testPartialResultsWithColumnFilter(new FirstKeyOnlyFilter());
    testPartialResultsWithColumnFilter(new ColumnPrefixFilter(Bytes.toBytes("testQualifier5")));
    testPartialResultsWithColumnFilter(new ColumnRangeFilter(Bytes.toBytes("testQualifer1"), true, Bytes.toBytes("testQualifier7"), true));
    Set<byte[]> qualifiers = new LinkedHashSet<>();
    qualifiers.add(Bytes.toBytes("testQualifier5"));
    testPartialResultsWithColumnFilter(new FirstKeyValueMatchingQualifiersFilter(qualifiers));
}
Also used : ColumnPrefixFilter(org.apache.hadoop.hbase.filter.ColumnPrefixFilter) LinkedHashSet(java.util.LinkedHashSet) FirstKeyOnlyFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter) ColumnRangeFilter(org.apache.hadoop.hbase.filter.ColumnRangeFilter) FirstKeyValueMatchingQualifiersFilter(org.apache.hadoop.hbase.filter.FirstKeyValueMatchingQualifiersFilter) Test(org.junit.Test)

Aggregations

FirstKeyOnlyFilter (org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter)28 Scan (org.apache.hadoop.hbase.client.Scan)17 Cell (org.apache.hadoop.hbase.Cell)9 ArrayList (java.util.ArrayList)8 Test (org.junit.Test)8 IOException (java.io.IOException)7 Result (org.apache.hadoop.hbase.client.Result)7 RegionScanner (org.apache.hadoop.hbase.regionserver.RegionScanner)6 Filter (org.apache.hadoop.hbase.filter.Filter)5 RowFilter (org.apache.hadoop.hbase.filter.RowFilter)4 Connection (java.sql.Connection)3 Put (org.apache.hadoop.hbase.client.Put)3 ResultScanner (org.apache.hadoop.hbase.client.ResultScanner)3 CompareFilter (org.apache.hadoop.hbase.filter.CompareFilter)3 FilterList (org.apache.hadoop.hbase.filter.FilterList)3 BloomFilter (org.apache.hive.common.util.BloomFilter)3 ImmutableBytesPtr (org.apache.phoenix.hbase.index.util.ImmutableBytesPtr)3 PhoenixConnection (org.apache.phoenix.jdbc.PhoenixConnection)3 PMetaDataEntity (org.apache.phoenix.schema.PMetaDataEntity)3 List (java.util.List)2