use of org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter in project hive by apache.
the class HiveHBaseInputFormatUtil method getScan.
/**
* Parse {@code jobConf} to create a {@link Scan} instance.
*/
public static Scan getScan(JobConf jobConf) throws IOException {
String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
boolean doColumnRegexMatching = jobConf.getBoolean(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, true);
List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);
ColumnMappings columnMappings;
try {
columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping, doColumnRegexMatching);
} catch (SerDeException e) {
throw new IOException(e);
}
if (columnMappings.size() < readColIDs.size()) {
throw new IOException("Cannot read more columns than the given table contains.");
}
boolean readAllColumns = ColumnProjectionUtils.isReadAllColumns(jobConf);
Scan scan = new Scan();
boolean empty = true;
// The list of families that have been added to the scan
List<String> addedFamilies = new ArrayList<String>();
if (!readAllColumns) {
ColumnMapping[] columnsMapping = columnMappings.getColumnsMapping();
for (int i : readColIDs) {
ColumnMapping colMap = columnsMapping[i];
if (colMap.hbaseRowKey || colMap.hbaseTimestamp) {
continue;
}
if (colMap.qualifierName == null) {
scan.addFamily(colMap.familyNameBytes);
addedFamilies.add(colMap.familyName);
} else {
if (!addedFamilies.contains(colMap.familyName)) {
// add only if the corresponding family has not already been added
scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes);
}
}
empty = false;
}
}
// count only on the keys
if (empty) {
if (readAllColumns) {
for (ColumnMapping colMap : columnMappings) {
if (colMap.hbaseRowKey || colMap.hbaseTimestamp) {
continue;
}
if (colMap.qualifierName == null) {
scan.addFamily(colMap.familyNameBytes);
} else {
scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes);
}
}
} else {
// Add a filter to just do a scan on the keys so that we pick up everything
scan.setFilter(new FilterList(new FirstKeyOnlyFilter(), new KeyOnlyFilter()));
}
}
String scanCache = jobConf.get(HBaseSerDe.HBASE_SCAN_CACHE);
if (scanCache != null) {
scan.setCaching(Integer.parseInt(scanCache));
}
String scanCacheBlocks = jobConf.get(HBaseSerDe.HBASE_SCAN_CACHEBLOCKS);
if (scanCacheBlocks != null) {
scan.setCacheBlocks(Boolean.parseBoolean(scanCacheBlocks));
}
String scanBatch = jobConf.get(HBaseSerDe.HBASE_SCAN_BATCH);
if (scanBatch != null) {
scan.setBatch(Integer.parseInt(scanBatch));
}
return scan;
}
use of org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter in project hive by apache.
the class HBaseReadWrite method getPartitionCount.
int getPartitionCount() throws IOException {
Filter fil = new FirstKeyOnlyFilter();
Iterator<Result> iter = scan(PART_TABLE, fil);
return Iterators.size(iter);
}
use of org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter in project hive by apache.
the class HBaseReadWrite method getTableCount.
int getTableCount() throws IOException {
Filter fil = new FirstKeyOnlyFilter();
Iterator<Result> iter = scan(TABLE_TABLE, fil);
return Iterators.size(iter);
}
use of org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter in project hive by apache.
the class HBaseReadWrite method getDatabaseCount.
int getDatabaseCount() throws IOException {
Filter fil = new FirstKeyOnlyFilter();
Iterator<Result> iter = scan(DB_TABLE, fil);
return Iterators.size(iter);
}
use of org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter in project drill by apache.
the class HBaseRecordReader method transformColumns.
@Override
protected Collection<SchemaPath> transformColumns(Collection<SchemaPath> columns) {
Set<SchemaPath> transformed = Sets.newLinkedHashSet();
rowKeyOnly = true;
if (!isStarQuery()) {
for (SchemaPath column : columns) {
if (column.getRootSegment().getPath().equalsIgnoreCase(ROW_KEY)) {
transformed.add(ROW_KEY_PATH);
continue;
}
rowKeyOnly = false;
NameSegment root = column.getRootSegment();
byte[] family = root.getPath().getBytes();
transformed.add(SchemaPath.getSimplePath(root.getPath()));
PathSegment child = root.getChild();
if (child != null && child.isNamed()) {
byte[] qualifier = child.getNameSegment().getPath().getBytes();
hbaseScan.addColumn(family, qualifier);
} else {
hbaseScan.addFamily(family);
}
}
/* if only the row key was requested, add a FirstKeyOnlyFilter to the scan
* to fetch only one KV from each row. If a filter is already part of this
* scan, add the FirstKeyOnlyFilter as the LAST filter of a MUST_PASS_ALL
* FilterList.
*/
if (rowKeyOnly) {
hbaseScan.setFilter(HBaseUtils.andFilterAtIndex(hbaseScan.getFilter(), HBaseUtils.LAST_FILTER, new FirstKeyOnlyFilter()));
}
} else {
rowKeyOnly = false;
transformed.add(ROW_KEY_PATH);
}
return transformed;
}
Aggregations