Search in sources :

Example 66 with FilterList

use of org.apache.hadoop.hbase.filter.FilterList in project cdap by caskdata.

the class DequeueScanObserver method preScannerOpen.

@Override
public RegionScanner preScannerOpen(ObserverContext<RegionCoprocessorEnvironment> e, Scan scan, RegionScanner s) throws IOException {
    ConsumerConfig consumerConfig = DequeueScanAttributes.getConsumerConfig(scan);
    Transaction tx = DequeueScanAttributes.getTx(scan);
    if (consumerConfig == null || tx == null) {
        return super.preScannerOpen(e, scan, s);
    }
    Filter dequeueFilter = new DequeueFilter(consumerConfig, tx);
    Filter existing = scan.getFilter();
    if (existing != null) {
        Filter combined = new FilterList(FilterList.Operator.MUST_PASS_ALL, existing, dequeueFilter);
        scan.setFilter(combined);
    } else {
        scan.setFilter(dequeueFilter);
    }
    return super.preScannerOpen(e, scan, s);
}
Also used : Transaction(org.apache.tephra.Transaction) Filter(org.apache.hadoop.hbase.filter.Filter) ConsumerConfig(co.cask.cdap.data2.queue.ConsumerConfig) FilterList(org.apache.hadoop.hbase.filter.FilterList)

Example 67 with FilterList

use of org.apache.hadoop.hbase.filter.FilterList in project hive by apache.

the class HiveHBaseInputFormatUtil method getScan.

/**
 * Parse {@code jobConf} to create a {@link Scan} instance.
 */
public static Scan getScan(JobConf jobConf) throws IOException {
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    boolean doColumnRegexMatching = jobConf.getBoolean(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, true);
    List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);
    ColumnMappings columnMappings;
    try {
        columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping, doColumnRegexMatching);
    } catch (SerDeException e) {
        throw new IOException(e);
    }
    if (columnMappings.size() < readColIDs.size()) {
        throw new IOException("Cannot read more columns than the given table contains.");
    }
    boolean readAllColumns = ColumnProjectionUtils.isReadAllColumns(jobConf);
    Scan scan = new Scan();
    boolean empty = true;
    // The list of families that have been added to the scan
    List<String> addedFamilies = new ArrayList<String>();
    if (!readAllColumns) {
        ColumnMapping[] columnsMapping = columnMappings.getColumnsMapping();
        for (int i : readColIDs) {
            ColumnMapping colMap = columnsMapping[i];
            if (colMap.hbaseRowKey || colMap.hbaseTimestamp) {
                continue;
            }
            if (colMap.qualifierName == null) {
                scan.addFamily(colMap.familyNameBytes);
                addedFamilies.add(colMap.familyName);
            } else {
                if (!addedFamilies.contains(colMap.familyName)) {
                    // add only if the corresponding family has not already been added
                    scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes);
                }
            }
            empty = false;
        }
    }
    // count only on the keys
    if (empty) {
        if (readAllColumns) {
            for (ColumnMapping colMap : columnMappings) {
                if (colMap.hbaseRowKey || colMap.hbaseTimestamp) {
                    continue;
                }
                if (colMap.qualifierName == null) {
                    scan.addFamily(colMap.familyNameBytes);
                } else {
                    scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes);
                }
            }
        } else {
            // Add a filter to just do a scan on the keys so that we pick up everything
            scan.setFilter(new FilterList(new FirstKeyOnlyFilter(), new KeyOnlyFilter()));
        }
    }
    String scanCache = jobConf.get(HBaseSerDe.HBASE_SCAN_CACHE);
    if (scanCache != null) {
        scan.setCaching(Integer.parseInt(scanCache));
    }
    String scanCacheBlocks = jobConf.get(HBaseSerDe.HBASE_SCAN_CACHEBLOCKS);
    if (scanCacheBlocks != null) {
        scan.setCacheBlocks(Boolean.parseBoolean(scanCacheBlocks));
    }
    String scanBatch = jobConf.get(HBaseSerDe.HBASE_SCAN_BATCH);
    if (scanBatch != null) {
        scan.setBatch(Integer.parseInt(scanBatch));
    }
    String filterObjectSerialized = jobConf.get(TableScanDesc.FILTER_OBJECT_CONF_STR);
    if (filterObjectSerialized != null) {
        setupScanRange(scan, filterObjectSerialized, jobConf, true);
    }
    return scan;
}
Also used : FirstKeyOnlyFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter) KeyOnlyFilter(org.apache.hadoop.hbase.filter.KeyOnlyFilter) FirstKeyOnlyFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter) ArrayList(java.util.ArrayList) FilterList(org.apache.hadoop.hbase.filter.FilterList) IOException(java.io.IOException) Scan(org.apache.hadoop.hbase.client.Scan) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) ColumnMapping(org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping)

Example 68 with FilterList

use of org.apache.hadoop.hbase.filter.FilterList in project hive by apache.

the class SampleHBasePredicateDecomposer method getScanRange.

@Override
public HBaseScanRange getScanRange(List<IndexSearchCondition> searchConditions) throws Exception {
    Map<String, List<IndexSearchCondition>> fieldConds = new HashMap<String, List<IndexSearchCondition>>();
    for (IndexSearchCondition condition : searchConditions) {
        String fieldName = condition.getFields()[0];
        List<IndexSearchCondition> fieldCond = fieldConds.get(fieldName);
        if (fieldCond == null) {
            fieldConds.put(fieldName, fieldCond = new ArrayList<IndexSearchCondition>());
        }
        fieldCond.add(condition);
    }
    List<Filter> filters = new ArrayList<Filter>();
    HBaseScanRange range = new HBaseScanRange();
    StructTypeInfo type = (StructTypeInfo) keyMapping.columnType;
    for (String name : type.getAllStructFieldNames()) {
        List<IndexSearchCondition> fieldCond = fieldConds.get(name);
        if (fieldCond == null || fieldCond.size() > 2) {
            continue;
        }
        for (IndexSearchCondition condition : fieldCond) {
            if (condition.getConstantDesc().getValue() == null) {
                continue;
            }
            String comparisonOp = condition.getComparisonOp();
            String constantVal = String.valueOf(condition.getConstantDesc().getValue());
            byte[] valueAsBytes = toBinary(constantVal, FIXED_LENGTH, false, false);
            if (comparisonOp.endsWith("UDFOPEqualOrGreaterThan")) {
                filters.add(new RowFilter(CompareOp.GREATER_OR_EQUAL, new BinaryComparator(valueAsBytes)));
            } else if (comparisonOp.endsWith("UDFOPGreaterThan")) {
                filters.add(new RowFilter(CompareOp.GREATER, new BinaryComparator(valueAsBytes)));
            } else if (comparisonOp.endsWith("UDFOPEqualOrLessThan")) {
                filters.add(new RowFilter(CompareOp.LESS_OR_EQUAL, new BinaryComparator(valueAsBytes)));
            } else if (comparisonOp.endsWith("UDFOPLessThan")) {
                filters.add(new RowFilter(CompareOp.LESS, new BinaryComparator(valueAsBytes)));
            } else {
                throw new IOException(comparisonOp + " is not a supported comparison operator");
            }
        }
    }
    if (!filters.isEmpty()) {
        range.addFilter(new FilterList(Operator.MUST_PASS_ALL, filters));
    }
    return range;
}
Also used : HashMap(java.util.HashMap) IndexSearchCondition(org.apache.hadoop.hive.ql.index.IndexSearchCondition) ArrayList(java.util.ArrayList) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) FilterList(org.apache.hadoop.hbase.filter.FilterList) IOException(java.io.IOException) BinaryComparator(org.apache.hadoop.hbase.filter.BinaryComparator) RowFilter(org.apache.hadoop.hbase.filter.RowFilter) RowFilter(org.apache.hadoop.hbase.filter.RowFilter) Filter(org.apache.hadoop.hbase.filter.Filter) FilterList(org.apache.hadoop.hbase.filter.FilterList) ArrayList(java.util.ArrayList) List(java.util.List)

Aggregations

FilterList (org.apache.hadoop.hbase.filter.FilterList)68 Filter (org.apache.hadoop.hbase.filter.Filter)36 Scan (org.apache.hadoop.hbase.client.Scan)16 QualifierFilter (org.apache.hadoop.hbase.filter.QualifierFilter)10 SingleColumnValueFilter (org.apache.hadoop.hbase.filter.SingleColumnValueFilter)10 TimelineFilterList (org.apache.hadoop.yarn.server.timelineservice.reader.filter.TimelineFilterList)10 BinaryComparator (org.apache.hadoop.hbase.filter.BinaryComparator)9 Test (org.junit.Test)8 ConsumerConfig (co.cask.cdap.data2.queue.ConsumerConfig)7 ArrayList (java.util.ArrayList)7 FamilyFilter (org.apache.hadoop.hbase.filter.FamilyFilter)7 Transaction (org.apache.tephra.Transaction)7 IOException (java.io.IOException)6 PrefixFilter (org.apache.hadoop.hbase.filter.PrefixFilter)6 Result (org.apache.hadoop.hbase.client.Result)5 PageFilter (org.apache.hadoop.hbase.filter.PageFilter)5 Cell (org.apache.hadoop.hbase.Cell)4 TableName (org.apache.hadoop.hbase.TableName)4 ResultScanner (org.apache.hadoop.hbase.client.ResultScanner)4 FirstKeyOnlyFilter (org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter)4