Search in sources :

Example 6 with RowFilter

use of org.apache.hadoop.hbase.filter.RowFilter in project hbase by apache.

the class TestFromClientSide method testJira6912.

@Test
public void testJira6912() throws Exception {
    final TableName tableName = TableName.valueOf(name.getMethodName());
    Table foo = TEST_UTIL.createTable(tableName, new byte[][] { FAMILY }, 10);
    List<Put> puts = new ArrayList<Put>();
    for (int i = 0; i != 100; i++) {
        Put put = new Put(Bytes.toBytes(i));
        put.addColumn(FAMILY, FAMILY, Bytes.toBytes(i));
        puts.add(put);
    }
    foo.put(puts);
    // If i comment this out it works
    TEST_UTIL.flush();
    Scan scan = new Scan();
    scan.setStartRow(Bytes.toBytes(1));
    scan.setStopRow(Bytes.toBytes(3));
    scan.addColumn(FAMILY, FAMILY);
    scan.setFilter(new RowFilter(CompareFilter.CompareOp.NOT_EQUAL, new BinaryComparator(Bytes.toBytes(1))));
    ResultScanner scanner = foo.getScanner(scan);
    Result[] bar = scanner.next(100);
    assertEquals(1, bar.length);
}
Also used : TableName(org.apache.hadoop.hbase.TableName) RowFilter(org.apache.hadoop.hbase.filter.RowFilter) ArrayList(java.util.ArrayList) MultiRowMutationEndpoint(org.apache.hadoop.hbase.coprocessor.MultiRowMutationEndpoint) BinaryComparator(org.apache.hadoop.hbase.filter.BinaryComparator) Test(org.junit.Test)

Example 7 with RowFilter

use of org.apache.hadoop.hbase.filter.RowFilter in project hbase by apache.

the class Export method getConfiguredScanForJob.

private static Scan getConfiguredScanForJob(Configuration conf, String[] args) throws IOException {
    Scan s = new Scan();
    // Optional arguments.
    // Set Scan Versions
    int versions = args.length > 2 ? Integer.parseInt(args[2]) : 1;
    s.setMaxVersions(versions);
    // Set Scan Range
    long startTime = args.length > 3 ? Long.parseLong(args[3]) : 0L;
    long endTime = args.length > 4 ? Long.parseLong(args[4]) : Long.MAX_VALUE;
    s.setTimeRange(startTime, endTime);
    // Set cache blocks
    s.setCacheBlocks(false);
    // set Start and Stop row
    if (conf.get(TableInputFormat.SCAN_ROW_START) != null) {
        s.setStartRow(Bytes.toBytesBinary(conf.get(TableInputFormat.SCAN_ROW_START)));
    }
    if (conf.get(TableInputFormat.SCAN_ROW_STOP) != null) {
        s.setStopRow(Bytes.toBytesBinary(conf.get(TableInputFormat.SCAN_ROW_STOP)));
    }
    // Set Scan Column Family
    boolean raw = Boolean.parseBoolean(conf.get(RAW_SCAN));
    if (raw) {
        s.setRaw(raw);
    }
    for (String columnFamily : conf.getTrimmedStrings(TableInputFormat.SCAN_COLUMN_FAMILY)) {
        s.addFamily(Bytes.toBytes(columnFamily));
    }
    // Set RowFilter or Prefix Filter if applicable.
    Filter exportFilter = getExportFilter(args);
    if (exportFilter != null) {
        LOG.info("Setting Scan Filter for Export.");
        s.setFilter(exportFilter);
    }
    int batching = conf.getInt(EXPORT_BATCHING, -1);
    if (batching != -1) {
        try {
            s.setBatch(batching);
        } catch (IncompatibleFilterException e) {
            LOG.error("Batching could not be set", e);
        }
    }
    LOG.info("versions=" + versions + ", starttime=" + startTime + ", endtime=" + endTime + ", keepDeletedCells=" + raw);
    return s;
}
Also used : PrefixFilter(org.apache.hadoop.hbase.filter.PrefixFilter) RowFilter(org.apache.hadoop.hbase.filter.RowFilter) Filter(org.apache.hadoop.hbase.filter.Filter) Scan(org.apache.hadoop.hbase.client.Scan) IncompatibleFilterException(org.apache.hadoop.hbase.filter.IncompatibleFilterException)

Example 8 with RowFilter

use of org.apache.hadoop.hbase.filter.RowFilter in project hive by apache.

the class TestHBaseFilterPlanUtil method testPartitionKeyScannerMixedType.

@Test
public void testPartitionKeyScannerMixedType() throws Exception {
    List<FieldSchema> parts = new ArrayList<FieldSchema>();
    parts.add(new FieldSchema("year", "int", null));
    parts.add(new FieldSchema("month", "int", null));
    parts.add(new FieldSchema("state", "string", null));
    // One prefix key and one minor key range
    ExpressionTree exprTree = PartFilterExprUtil.getFilterParser("year = 2015 and state = 'CA'").tree;
    PlanResult planRes = HBaseFilterPlanUtil.getFilterPlan(exprTree, parts);
    Assert.assertEquals(planRes.plan.getPlans().size(), 1);
    ScanPlan sp = planRes.plan.getPlans().get(0);
    byte[] startRowSuffix = sp.getStartRowSuffix("testdb", "testtb", parts);
    byte[] endRowSuffix = sp.getEndRowSuffix("testdb", "testtb", parts);
    RowFilter filter = (RowFilter) sp.getFilter(parts);
    // scan range contains the major key year, rowfilter contains minor key state
    Assert.assertTrue(Bytes.contains(startRowSuffix, Shorts.toByteArray((short) 2015)));
    Assert.assertTrue(Bytes.contains(endRowSuffix, Shorts.toByteArray((short) 2016)));
    PartitionKeyComparator comparator = (PartitionKeyComparator) filter.getComparator();
    Assert.assertEquals(comparator.ranges.size(), 1);
    Assert.assertEquals(comparator.ranges.get(0).keyName, "state");
}
Also used : PlanResult(org.apache.hadoop.hive.metastore.hbase.HBaseFilterPlanUtil.PlanResult) MultiScanPlan(org.apache.hadoop.hive.metastore.hbase.HBaseFilterPlanUtil.MultiScanPlan) ScanPlan(org.apache.hadoop.hive.metastore.hbase.HBaseFilterPlanUtil.ScanPlan) RowFilter(org.apache.hadoop.hbase.filter.RowFilter) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ExpressionTree(org.apache.hadoop.hive.metastore.parser.ExpressionTree) Test(org.junit.Test)

Example 9 with RowFilter

use of org.apache.hadoop.hbase.filter.RowFilter in project hive by apache.

the class TestHBaseFilterPlanUtil method testPartitionKeyScannerAllString.

@Test
public void testPartitionKeyScannerAllString() throws Exception {
    List<FieldSchema> parts = new ArrayList<FieldSchema>();
    parts.add(new FieldSchema("year", "string", null));
    parts.add(new FieldSchema("month", "string", null));
    parts.add(new FieldSchema("state", "string", null));
    // One prefix key and one minor key range
    ExpressionTree exprTree = PartFilterExprUtil.getFilterParser("year = 2015 and state = 'CA'").tree;
    PlanResult planRes = HBaseFilterPlanUtil.getFilterPlan(exprTree, parts);
    Assert.assertEquals(planRes.plan.getPlans().size(), 1);
    ScanPlan sp = planRes.plan.getPlans().get(0);
    byte[] startRowSuffix = sp.getStartRowSuffix("testdb", "testtb", parts);
    byte[] endRowSuffix = sp.getEndRowSuffix("testdb", "testtb", parts);
    RowFilter filter = (RowFilter) sp.getFilter(parts);
    // scan range contains the major key year, rowfilter contains minor key state
    Assert.assertTrue(Bytes.contains(startRowSuffix, "2015".getBytes()));
    Assert.assertTrue(Bytes.contains(endRowSuffix, "2015".getBytes()));
    Assert.assertFalse(Bytes.contains(startRowSuffix, "CA".getBytes()));
    Assert.assertFalse(Bytes.contains(endRowSuffix, "CA".getBytes()));
    PartitionKeyComparator comparator = (PartitionKeyComparator) filter.getComparator();
    Assert.assertEquals(comparator.ranges.size(), 1);
    Assert.assertEquals(comparator.ranges.get(0).keyName, "state");
    // Two prefix key and one LIKE operator
    exprTree = PartFilterExprUtil.getFilterParser("year = 2015 and month > 10 " + "and month <= 11 and state like 'C%'").tree;
    planRes = HBaseFilterPlanUtil.getFilterPlan(exprTree, parts);
    Assert.assertEquals(planRes.plan.getPlans().size(), 1);
    sp = planRes.plan.getPlans().get(0);
    startRowSuffix = sp.getStartRowSuffix("testdb", "testtb", parts);
    endRowSuffix = sp.getEndRowSuffix("testdb", "testtb", parts);
    filter = (RowFilter) sp.getFilter(parts);
    // scan range contains the major key value year/month, rowfilter contains LIKE operator
    Assert.assertTrue(Bytes.contains(startRowSuffix, "2015".getBytes()));
    Assert.assertTrue(Bytes.contains(endRowSuffix, "2015".getBytes()));
    Assert.assertTrue(Bytes.contains(startRowSuffix, "10".getBytes()));
    Assert.assertTrue(Bytes.contains(endRowSuffix, "11".getBytes()));
    comparator = (PartitionKeyComparator) filter.getComparator();
    Assert.assertEquals(comparator.ops.size(), 1);
    Assert.assertEquals(comparator.ops.get(0).keyName, "state");
    // One prefix key, one minor key range and one LIKE operator
    exprTree = PartFilterExprUtil.getFilterParser("year >= 2014 and month > 10 " + "and month <= 11 and state like 'C%'").tree;
    planRes = HBaseFilterPlanUtil.getFilterPlan(exprTree, parts);
    Assert.assertEquals(planRes.plan.getPlans().size(), 1);
    sp = planRes.plan.getPlans().get(0);
    startRowSuffix = sp.getStartRowSuffix("testdb", "testtb", parts);
    endRowSuffix = sp.getEndRowSuffix("testdb", "testtb", parts);
    filter = (RowFilter) sp.getFilter(parts);
    // scan range contains the major key value year (low bound), rowfilter contains minor key state
    // and LIKE operator
    Assert.assertTrue(Bytes.contains(startRowSuffix, "2014".getBytes()));
    comparator = (PartitionKeyComparator) filter.getComparator();
    Assert.assertEquals(comparator.ranges.size(), 1);
    Assert.assertEquals(comparator.ranges.get(0).keyName, "month");
    Assert.assertEquals(comparator.ops.size(), 1);
    Assert.assertEquals(comparator.ops.get(0).keyName, "state");
    // Condition contains or
    exprTree = PartFilterExprUtil.getFilterParser("year = 2014 and (month > 10 " + "or month < 3)").tree;
    planRes = HBaseFilterPlanUtil.getFilterPlan(exprTree, parts);
    sp = planRes.plan.getPlans().get(0);
    startRowSuffix = sp.getStartRowSuffix("testdb", "testtb", parts);
    endRowSuffix = sp.getEndRowSuffix("testdb", "testtb", parts);
    filter = (RowFilter) sp.getFilter(parts);
    // The first ScanPlan contains year = 2014 and month > 10
    Assert.assertTrue(Bytes.contains(startRowSuffix, "2014".getBytes()));
    Assert.assertTrue(Bytes.contains(endRowSuffix, "2014".getBytes()));
    Assert.assertTrue(Bytes.contains(startRowSuffix, "10".getBytes()));
    sp = planRes.plan.getPlans().get(1);
    startRowSuffix = sp.getStartRowSuffix("testdb", "testtb", parts);
    endRowSuffix = sp.getEndRowSuffix("testdb", "testtb", parts);
    filter = (RowFilter) sp.getFilter(parts);
    // The first ScanPlan contains year = 2014 and month < 3
    Assert.assertTrue(Bytes.contains(startRowSuffix, "2014".getBytes()));
    Assert.assertTrue(Bytes.contains(endRowSuffix, "2014".getBytes()));
    Assert.assertTrue(Bytes.contains(endRowSuffix, "3".getBytes()));
}
Also used : PlanResult(org.apache.hadoop.hive.metastore.hbase.HBaseFilterPlanUtil.PlanResult) MultiScanPlan(org.apache.hadoop.hive.metastore.hbase.HBaseFilterPlanUtil.MultiScanPlan) ScanPlan(org.apache.hadoop.hive.metastore.hbase.HBaseFilterPlanUtil.ScanPlan) RowFilter(org.apache.hadoop.hbase.filter.RowFilter) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) ExpressionTree(org.apache.hadoop.hive.metastore.parser.ExpressionTree) Test(org.junit.Test)

Example 10 with RowFilter

use of org.apache.hadoop.hbase.filter.RowFilter in project drill by axbaretto.

the class HBaseFilterBuilder method createHBaseScanSpec.

private HBaseScanSpec createHBaseScanSpec(FunctionCall call, CompareFunctionsProcessor processor) {
    String functionName = processor.getFunctionName();
    SchemaPath field = processor.getPath();
    byte[] fieldValue = processor.getValue();
    boolean sortOrderAscending = processor.isSortOrderAscending();
    boolean isRowKey = field.getRootSegmentPath().equals(ROW_KEY);
    if (!(isRowKey || (!field.getRootSegment().isLastPath() && field.getRootSegment().getChild().isLastPath() && field.getRootSegment().getChild().isNamed()))) {
        /*
       * if the field in this function is neither the row_key nor a qualified HBase column, return.
       */
        return null;
    }
    if (processor.isRowKeyPrefixComparison()) {
        return createRowKeyPrefixScanSpec(call, processor);
    }
    CompareOp compareOp = null;
    boolean isNullTest = false;
    ByteArrayComparable comparator = new BinaryComparator(fieldValue);
    byte[] startRow = HConstants.EMPTY_START_ROW;
    byte[] stopRow = HConstants.EMPTY_END_ROW;
    switch(functionName) {
        case "equal":
            compareOp = CompareOp.EQUAL;
            if (isRowKey) {
                startRow = fieldValue;
                /* stopRow should be just greater than 'value'*/
                stopRow = Arrays.copyOf(fieldValue, fieldValue.length + 1);
                compareOp = CompareOp.EQUAL;
            }
            break;
        case "not_equal":
            compareOp = CompareOp.NOT_EQUAL;
            break;
        case "greater_than_or_equal_to":
            if (sortOrderAscending) {
                compareOp = CompareOp.GREATER_OR_EQUAL;
                if (isRowKey) {
                    startRow = fieldValue;
                }
            } else {
                compareOp = CompareOp.LESS_OR_EQUAL;
                if (isRowKey) {
                    // stopRow should be just greater than 'value'
                    stopRow = Arrays.copyOf(fieldValue, fieldValue.length + 1);
                }
            }
            break;
        case "greater_than":
            if (sortOrderAscending) {
                compareOp = CompareOp.GREATER;
                if (isRowKey) {
                    // startRow should be just greater than 'value'
                    startRow = Arrays.copyOf(fieldValue, fieldValue.length + 1);
                }
            } else {
                compareOp = CompareOp.LESS;
                if (isRowKey) {
                    stopRow = fieldValue;
                }
            }
            break;
        case "less_than_or_equal_to":
            if (sortOrderAscending) {
                compareOp = CompareOp.LESS_OR_EQUAL;
                if (isRowKey) {
                    // stopRow should be just greater than 'value'
                    stopRow = Arrays.copyOf(fieldValue, fieldValue.length + 1);
                }
            } else {
                compareOp = CompareOp.GREATER_OR_EQUAL;
                if (isRowKey) {
                    startRow = fieldValue;
                }
            }
            break;
        case "less_than":
            if (sortOrderAscending) {
                compareOp = CompareOp.LESS;
                if (isRowKey) {
                    stopRow = fieldValue;
                }
            } else {
                compareOp = CompareOp.GREATER;
                if (isRowKey) {
                    // startRow should be just greater than 'value'
                    startRow = Arrays.copyOf(fieldValue, fieldValue.length + 1);
                }
            }
            break;
        case "isnull":
        case "isNull":
        case "is null":
            if (isRowKey) {
                return null;
            }
            isNullTest = true;
            compareOp = CompareOp.EQUAL;
            comparator = new NullComparator();
            break;
        case "isnotnull":
        case "isNotNull":
        case "is not null":
            if (isRowKey) {
                return null;
            }
            compareOp = CompareOp.NOT_EQUAL;
            comparator = new NullComparator();
            break;
        case "like":
            /*
       * Convert the LIKE operand to Regular Expression pattern so that we can
       * apply RegexStringComparator()
       */
            HBaseRegexParser parser = new HBaseRegexParser(call).parse();
            compareOp = CompareOp.EQUAL;
            comparator = new RegexStringComparator(parser.getRegexString());
            /*
       * We can possibly do better if the LIKE operator is on the row_key
       */
            if (isRowKey) {
                String prefix = parser.getPrefixString();
                if (prefix != null) {
                    /*
           * If there is a literal prefix, it can help us prune the scan to a sub range
           */
                    if (prefix.equals(parser.getLikeString())) {
                        /* The operand value is literal. This turns the LIKE operator to EQUAL operator */
                        startRow = stopRow = fieldValue;
                        compareOp = null;
                    } else {
                        startRow = prefix.getBytes(Charsets.UTF_8);
                        stopRow = startRow.clone();
                        boolean isMaxVal = true;
                        for (int i = stopRow.length - 1; i >= 0; --i) {
                            int nextByteValue = (0xff & stopRow[i]) + 1;
                            if (nextByteValue < 0xff) {
                                stopRow[i] = (byte) nextByteValue;
                                isMaxVal = false;
                                break;
                            } else {
                                stopRow[i] = 0;
                            }
                        }
                        if (isMaxVal) {
                            stopRow = HConstants.EMPTY_END_ROW;
                        }
                    }
                }
            }
            break;
    }
    if (compareOp != null || startRow != HConstants.EMPTY_START_ROW || stopRow != HConstants.EMPTY_END_ROW) {
        Filter filter = null;
        if (isRowKey) {
            if (compareOp != null) {
                filter = new RowFilter(compareOp, comparator);
            }
        } else {
            byte[] family = HBaseUtils.getBytes(field.getRootSegment().getPath());
            byte[] qualifier = HBaseUtils.getBytes(field.getRootSegment().getChild().getNameSegment().getPath());
            filter = new SingleColumnValueFilter(family, qualifier, compareOp, comparator);
            ((SingleColumnValueFilter) filter).setLatestVersionOnly(true);
            if (!isNullTest) {
                ((SingleColumnValueFilter) filter).setFilterIfMissing(true);
            }
        }
        return new HBaseScanSpec(groupScan.getTableName(), startRow, stopRow, filter);
    }
    // else
    return null;
}
Also used : SingleColumnValueFilter(org.apache.hadoop.hbase.filter.SingleColumnValueFilter) NullComparator(org.apache.hadoop.hbase.filter.NullComparator) BinaryComparator(org.apache.hadoop.hbase.filter.BinaryComparator) RegexStringComparator(org.apache.hadoop.hbase.filter.RegexStringComparator) ByteArrayComparable(org.apache.hadoop.hbase.filter.ByteArrayComparable) RowFilter(org.apache.hadoop.hbase.filter.RowFilter) SchemaPath(org.apache.drill.common.expression.SchemaPath) RowFilter(org.apache.hadoop.hbase.filter.RowFilter) Filter(org.apache.hadoop.hbase.filter.Filter) SingleColumnValueFilter(org.apache.hadoop.hbase.filter.SingleColumnValueFilter) CompareOp(org.apache.hadoop.hbase.filter.CompareFilter.CompareOp)

Aggregations

RowFilter (org.apache.hadoop.hbase.filter.RowFilter)39 Filter (org.apache.hadoop.hbase.filter.Filter)31 RegexStringComparator (org.apache.hadoop.hbase.filter.RegexStringComparator)20 BinaryComparator (org.apache.hadoop.hbase.filter.BinaryComparator)17 ArrayList (java.util.ArrayList)15 FirstKeyOnlyFilter (org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter)13 SingleColumnValueFilter (org.apache.hadoop.hbase.filter.SingleColumnValueFilter)12 CompareFilter (org.apache.hadoop.hbase.filter.CompareFilter)11 PrefixFilter (org.apache.hadoop.hbase.filter.PrefixFilter)11 Scan (org.apache.hadoop.hbase.client.Scan)10 Test (org.junit.Test)10 Result (org.apache.hadoop.hbase.client.Result)9 BloomFilter (org.apache.hive.common.util.BloomFilter)8 SchemaPath (org.apache.drill.common.expression.SchemaPath)5 HBaseScanSpec (org.apache.drill.exec.store.hbase.HBaseScanSpec)5 ByteArrayComparable (org.apache.hadoop.hbase.filter.ByteArrayComparable)5 CompareOp (org.apache.hadoop.hbase.filter.CompareFilter.CompareOp)5 FilterList (org.apache.hadoop.hbase.filter.FilterList)5 NullComparator (org.apache.hadoop.hbase.filter.NullComparator)5 QualifierFilter (org.apache.hadoop.hbase.filter.QualifierFilter)4