Search in sources :

Example 16 with FirstKeyOnlyFilter

use of org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter in project hive by apache.

the class HiveHBaseInputFormatUtil method getScan.

/**
   * Parse {@code jobConf} to create a {@link Scan} instance.
   */
public static Scan getScan(JobConf jobConf) throws IOException {
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    boolean doColumnRegexMatching = jobConf.getBoolean(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, true);
    List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);
    ColumnMappings columnMappings;
    try {
        columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping, doColumnRegexMatching);
    } catch (SerDeException e) {
        throw new IOException(e);
    }
    if (columnMappings.size() < readColIDs.size()) {
        throw new IOException("Cannot read more columns than the given table contains.");
    }
    boolean readAllColumns = ColumnProjectionUtils.isReadAllColumns(jobConf);
    Scan scan = new Scan();
    boolean empty = true;
    // The list of families that have been added to the scan
    List<String> addedFamilies = new ArrayList<String>();
    if (!readAllColumns) {
        ColumnMapping[] columnsMapping = columnMappings.getColumnsMapping();
        for (int i : readColIDs) {
            ColumnMapping colMap = columnsMapping[i];
            if (colMap.hbaseRowKey || colMap.hbaseTimestamp) {
                continue;
            }
            if (colMap.qualifierName == null) {
                scan.addFamily(colMap.familyNameBytes);
                addedFamilies.add(colMap.familyName);
            } else {
                if (!addedFamilies.contains(colMap.familyName)) {
                    // add only if the corresponding family has not already been added
                    scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes);
                }
            }
            empty = false;
        }
    }
    // count only on the keys
    if (empty) {
        if (readAllColumns) {
            for (ColumnMapping colMap : columnMappings) {
                if (colMap.hbaseRowKey || colMap.hbaseTimestamp) {
                    continue;
                }
                if (colMap.qualifierName == null) {
                    scan.addFamily(colMap.familyNameBytes);
                } else {
                    scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes);
                }
            }
        } else {
            // Add a filter to just do a scan on the keys so that we pick up everything
            scan.setFilter(new FilterList(new FirstKeyOnlyFilter(), new KeyOnlyFilter()));
        }
    }
    String scanCache = jobConf.get(HBaseSerDe.HBASE_SCAN_CACHE);
    if (scanCache != null) {
        scan.setCaching(Integer.parseInt(scanCache));
    }
    String scanCacheBlocks = jobConf.get(HBaseSerDe.HBASE_SCAN_CACHEBLOCKS);
    if (scanCacheBlocks != null) {
        scan.setCacheBlocks(Boolean.parseBoolean(scanCacheBlocks));
    }
    String scanBatch = jobConf.get(HBaseSerDe.HBASE_SCAN_BATCH);
    if (scanBatch != null) {
        scan.setBatch(Integer.parseInt(scanBatch));
    }
    return scan;
}
Also used : FirstKeyOnlyFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter) KeyOnlyFilter(org.apache.hadoop.hbase.filter.KeyOnlyFilter) FirstKeyOnlyFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter) ArrayList(java.util.ArrayList) FilterList(org.apache.hadoop.hbase.filter.FilterList) IOException(java.io.IOException) Scan(org.apache.hadoop.hbase.client.Scan) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) ColumnMapping(org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping)

Example 17 with FirstKeyOnlyFilter

use of org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter in project hive by apache.

the class HBaseReadWrite method getPartitionCount.

int getPartitionCount() throws IOException {
    Filter fil = new FirstKeyOnlyFilter();
    Iterator<Result> iter = scan(PART_TABLE, fil);
    return Iterators.size(iter);
}
Also used : RowFilter(org.apache.hadoop.hbase.filter.RowFilter) FirstKeyOnlyFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter) Filter(org.apache.hadoop.hbase.filter.Filter) CompareFilter(org.apache.hadoop.hbase.filter.CompareFilter) BloomFilter(org.apache.hive.common.util.BloomFilter) FirstKeyOnlyFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter) Result(org.apache.hadoop.hbase.client.Result)

Example 18 with FirstKeyOnlyFilter

use of org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter in project hive by apache.

the class HBaseReadWrite method getTableCount.

int getTableCount() throws IOException {
    Filter fil = new FirstKeyOnlyFilter();
    Iterator<Result> iter = scan(TABLE_TABLE, fil);
    return Iterators.size(iter);
}
Also used : RowFilter(org.apache.hadoop.hbase.filter.RowFilter) FirstKeyOnlyFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter) Filter(org.apache.hadoop.hbase.filter.Filter) CompareFilter(org.apache.hadoop.hbase.filter.CompareFilter) BloomFilter(org.apache.hive.common.util.BloomFilter) FirstKeyOnlyFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter) Result(org.apache.hadoop.hbase.client.Result)

Example 19 with FirstKeyOnlyFilter

use of org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter in project hive by apache.

the class HBaseReadWrite method getDatabaseCount.

int getDatabaseCount() throws IOException {
    Filter fil = new FirstKeyOnlyFilter();
    Iterator<Result> iter = scan(DB_TABLE, fil);
    return Iterators.size(iter);
}
Also used : RowFilter(org.apache.hadoop.hbase.filter.RowFilter) FirstKeyOnlyFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter) Filter(org.apache.hadoop.hbase.filter.Filter) CompareFilter(org.apache.hadoop.hbase.filter.CompareFilter) BloomFilter(org.apache.hive.common.util.BloomFilter) FirstKeyOnlyFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter) Result(org.apache.hadoop.hbase.client.Result)

Example 20 with FirstKeyOnlyFilter

use of org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter in project drill by apache.

the class HBaseRecordReader method transformColumns.

@Override
protected Collection<SchemaPath> transformColumns(Collection<SchemaPath> columns) {
    Set<SchemaPath> transformed = Sets.newLinkedHashSet();
    rowKeyOnly = true;
    if (!isStarQuery()) {
        for (SchemaPath column : columns) {
            if (column.getRootSegment().getPath().equalsIgnoreCase(ROW_KEY)) {
                transformed.add(ROW_KEY_PATH);
                continue;
            }
            rowKeyOnly = false;
            NameSegment root = column.getRootSegment();
            byte[] family = root.getPath().getBytes();
            transformed.add(SchemaPath.getSimplePath(root.getPath()));
            PathSegment child = root.getChild();
            if (child != null && child.isNamed()) {
                byte[] qualifier = child.getNameSegment().getPath().getBytes();
                hbaseScan.addColumn(family, qualifier);
            } else {
                hbaseScan.addFamily(family);
            }
        }
        /* if only the row key was requested, add a FirstKeyOnlyFilter to the scan
       * to fetch only one KV from each row. If a filter is already part of this
       * scan, add the FirstKeyOnlyFilter as the LAST filter of a MUST_PASS_ALL
       * FilterList.
       */
        if (rowKeyOnly) {
            hbaseScan.setFilter(HBaseUtils.andFilterAtIndex(hbaseScan.getFilter(), HBaseUtils.LAST_FILTER, new FirstKeyOnlyFilter()));
        }
    } else {
        rowKeyOnly = false;
        transformed.add(ROW_KEY_PATH);
    }
    return transformed;
}
Also used : NameSegment(org.apache.drill.common.expression.PathSegment.NameSegment) SchemaPath(org.apache.drill.common.expression.SchemaPath) FirstKeyOnlyFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter) PathSegment(org.apache.drill.common.expression.PathSegment)

Aggregations

FirstKeyOnlyFilter (org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter)28 Scan (org.apache.hadoop.hbase.client.Scan)17 Cell (org.apache.hadoop.hbase.Cell)9 ArrayList (java.util.ArrayList)8 Test (org.junit.Test)8 IOException (java.io.IOException)7 Result (org.apache.hadoop.hbase.client.Result)7 RegionScanner (org.apache.hadoop.hbase.regionserver.RegionScanner)6 Filter (org.apache.hadoop.hbase.filter.Filter)5 RowFilter (org.apache.hadoop.hbase.filter.RowFilter)4 Connection (java.sql.Connection)3 Put (org.apache.hadoop.hbase.client.Put)3 ResultScanner (org.apache.hadoop.hbase.client.ResultScanner)3 CompareFilter (org.apache.hadoop.hbase.filter.CompareFilter)3 FilterList (org.apache.hadoop.hbase.filter.FilterList)3 BloomFilter (org.apache.hive.common.util.BloomFilter)3 ImmutableBytesPtr (org.apache.phoenix.hbase.index.util.ImmutableBytesPtr)3 PhoenixConnection (org.apache.phoenix.jdbc.PhoenixConnection)3 PMetaDataEntity (org.apache.phoenix.schema.PMetaDataEntity)3 List (java.util.List)2