use of org.apache.hadoop.hbase.filter.Filter in project hbase by apache.
the class TestScannersWithFilters method testRowFilter.
@Test
public void testRowFilter() throws Exception {
// Match a single row, all keys
long expectedRows = 1;
long expectedKeys = colsPerRow;
Filter f = new RowFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("testRowOne-2")));
Scan s = new Scan();
s.setFilter(f);
verifyScanNoEarlyOut(s, expectedRows, expectedKeys);
// Match a two rows, one from each group, using regex
expectedRows = 2;
expectedKeys = colsPerRow;
f = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("testRow.+-2"));
s = new Scan();
s.setFilter(f);
verifyScanNoEarlyOut(s, expectedRows, expectedKeys);
// Match rows less than
// Expect all keys in one row
expectedRows = 1;
expectedKeys = colsPerRow;
f = new RowFilter(CompareOp.LESS, new BinaryComparator(Bytes.toBytes("testRowOne-2")));
s = new Scan();
s.setFilter(f);
verifyScanNoEarlyOut(s, expectedRows, expectedKeys);
// Match rows less than or equal
// Expect all keys in two rows
expectedRows = 2;
expectedKeys = colsPerRow;
f = new RowFilter(CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes("testRowOne-2")));
s = new Scan();
s.setFilter(f);
verifyScanNoEarlyOut(s, expectedRows, expectedKeys);
// Match rows not equal
// Expect all keys in all but one row
expectedRows = numRows - 1;
expectedKeys = colsPerRow;
f = new RowFilter(CompareOp.NOT_EQUAL, new BinaryComparator(Bytes.toBytes("testRowOne-2")));
s = new Scan();
s.setFilter(f);
verifyScanNoEarlyOut(s, expectedRows, expectedKeys);
// Match keys greater or equal
// Expect all keys in all but one row
expectedRows = numRows - 1;
expectedKeys = colsPerRow;
f = new RowFilter(CompareOp.GREATER_OR_EQUAL, new BinaryComparator(Bytes.toBytes("testRowOne-2")));
s = new Scan();
s.setFilter(f);
verifyScanNoEarlyOut(s, expectedRows, expectedKeys);
// Match keys greater
// Expect all keys in all but two rows
expectedRows = numRows - 2;
expectedKeys = colsPerRow;
f = new RowFilter(CompareOp.GREATER, new BinaryComparator(Bytes.toBytes("testRowOne-2")));
s = new Scan();
s.setFilter(f);
verifyScanNoEarlyOut(s, expectedRows, expectedKeys);
// Match rows not equal to testRowTwo-2
// Look across rows and fully validate the keys and ordering
// Should see all keys in all rows but testRowTwo-2
f = new RowFilter(CompareOp.NOT_EQUAL, new BinaryComparator(Bytes.toBytes("testRowOne-2")));
s = new Scan();
s.setFilter(f);
KeyValue[] kvs = { // testRowOne-0
new KeyValue(ROWS_ONE[0], FAMILIES[0], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[0], FAMILIES[0], QUALIFIERS_ONE[2], VALUES[0]), new KeyValue(ROWS_ONE[0], FAMILIES[0], QUALIFIERS_ONE[3], VALUES[0]), new KeyValue(ROWS_ONE[0], FAMILIES[1], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[0], FAMILIES[1], QUALIFIERS_ONE[2], VALUES[0]), new KeyValue(ROWS_ONE[0], FAMILIES[1], QUALIFIERS_ONE[3], VALUES[0]), // testRowOne-3
new KeyValue(ROWS_ONE[3], FAMILIES[0], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[3], FAMILIES[0], QUALIFIERS_ONE[2], VALUES[0]), new KeyValue(ROWS_ONE[3], FAMILIES[0], QUALIFIERS_ONE[3], VALUES[0]), new KeyValue(ROWS_ONE[3], FAMILIES[1], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[3], FAMILIES[1], QUALIFIERS_ONE[2], VALUES[0]), new KeyValue(ROWS_ONE[3], FAMILIES[1], QUALIFIERS_ONE[3], VALUES[0]), // testRowTwo-0
new KeyValue(ROWS_TWO[0], FAMILIES[0], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[0], FAMILIES[0], QUALIFIERS_TWO[2], VALUES[1]), new KeyValue(ROWS_TWO[0], FAMILIES[0], QUALIFIERS_TWO[3], VALUES[1]), new KeyValue(ROWS_TWO[0], FAMILIES[1], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[0], FAMILIES[1], QUALIFIERS_TWO[2], VALUES[1]), new KeyValue(ROWS_TWO[0], FAMILIES[1], QUALIFIERS_TWO[3], VALUES[1]), // testRowTwo-2
new KeyValue(ROWS_TWO[2], FAMILIES[0], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[2], FAMILIES[0], QUALIFIERS_TWO[2], VALUES[1]), new KeyValue(ROWS_TWO[2], FAMILIES[0], QUALIFIERS_TWO[3], VALUES[1]), new KeyValue(ROWS_TWO[2], FAMILIES[1], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[2], FAMILIES[1], QUALIFIERS_TWO[2], VALUES[1]), new KeyValue(ROWS_TWO[2], FAMILIES[1], QUALIFIERS_TWO[3], VALUES[1]), // testRowTwo-3
new KeyValue(ROWS_TWO[3], FAMILIES[0], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[3], FAMILIES[0], QUALIFIERS_TWO[2], VALUES[1]), new KeyValue(ROWS_TWO[3], FAMILIES[0], QUALIFIERS_TWO[3], VALUES[1]), new KeyValue(ROWS_TWO[3], FAMILIES[1], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[3], FAMILIES[1], QUALIFIERS_TWO[2], VALUES[1]), new KeyValue(ROWS_TWO[3], FAMILIES[1], QUALIFIERS_TWO[3], VALUES[1]) };
verifyScanFull(s, kvs);
// Test across rows and groups with a regex
// Filter out everything that doesn't match "*-2"
// Expect all keys in two rows
f = new RowFilter(CompareOp.EQUAL, new RegexStringComparator(".+-2"));
s = new Scan();
s.setFilter(f);
kvs = new KeyValue[] { // testRowOne-2
new KeyValue(ROWS_ONE[2], FAMILIES[0], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[2], FAMILIES[0], QUALIFIERS_ONE[2], VALUES[0]), new KeyValue(ROWS_ONE[2], FAMILIES[0], QUALIFIERS_ONE[3], VALUES[0]), new KeyValue(ROWS_ONE[2], FAMILIES[1], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[2], FAMILIES[1], QUALIFIERS_ONE[2], VALUES[0]), new KeyValue(ROWS_ONE[2], FAMILIES[1], QUALIFIERS_ONE[3], VALUES[0]), // testRowTwo-2
new KeyValue(ROWS_TWO[2], FAMILIES[0], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[2], FAMILIES[0], QUALIFIERS_TWO[2], VALUES[1]), new KeyValue(ROWS_TWO[2], FAMILIES[0], QUALIFIERS_TWO[3], VALUES[1]), new KeyValue(ROWS_TWO[2], FAMILIES[1], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[2], FAMILIES[1], QUALIFIERS_TWO[2], VALUES[1]), new KeyValue(ROWS_TWO[2], FAMILIES[1], QUALIFIERS_TWO[3], VALUES[1]) };
verifyScanFull(s, kvs);
}
use of org.apache.hadoop.hbase.filter.Filter in project hbase by apache.
the class TestScannersWithFilters method testFilterList.
@Test
public void testFilterList() throws Exception {
// Test getting a single row, single key using Row, Qualifier, and Value
// regular expression and substring filters
// Use must pass all
List<Filter> filters = new ArrayList<>(3);
filters.add(new RowFilter(CompareOp.EQUAL, new RegexStringComparator(".+-2")));
filters.add(new QualifierFilter(CompareOp.EQUAL, new RegexStringComparator(".+-2")));
filters.add(new ValueFilter(CompareOp.EQUAL, new SubstringComparator("One")));
Filter f = new FilterList(Operator.MUST_PASS_ALL, filters);
Scan s = new Scan();
s.addFamily(FAMILIES[0]);
s.setFilter(f);
KeyValue[] kvs = { new KeyValue(ROWS_ONE[2], FAMILIES[0], QUALIFIERS_ONE[2], VALUES[0]) };
verifyScanFull(s, kvs);
// Test getting everything with a MUST_PASS_ONE filter including row, qf,
// val, regular expression and substring filters
filters.clear();
filters.add(new RowFilter(CompareOp.EQUAL, new RegexStringComparator(".+Two.+")));
filters.add(new QualifierFilter(CompareOp.EQUAL, new RegexStringComparator(".+-2")));
filters.add(new ValueFilter(CompareOp.EQUAL, new SubstringComparator("One")));
f = new FilterList(Operator.MUST_PASS_ONE, filters);
s = new Scan();
s.setFilter(f);
verifyScanNoEarlyOut(s, numRows, colsPerRow);
}
use of org.apache.hadoop.hbase.filter.Filter in project hbase by apache.
the class TestScannersWithFilters method testQualifierFilter.
@Test
public void testQualifierFilter() throws Exception {
// Match two keys (one from each family) in half the rows
long expectedRows = numRows / 2;
long expectedKeys = 2;
Filter f = new QualifierFilter(CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes("testQualifierOne-2")));
Scan s = new Scan();
s.setFilter(f);
verifyScanNoEarlyOut(s, expectedRows, expectedKeys);
// Match keys less than same qualifier
// Expect only two keys (one from each family) in half the rows
expectedRows = numRows / 2;
expectedKeys = 2;
f = new QualifierFilter(CompareOp.LESS, new BinaryComparator(Bytes.toBytes("testQualifierOne-2")));
s = new Scan();
s.setFilter(f);
verifyScanNoEarlyOut(s, expectedRows, expectedKeys);
// Match keys less than or equal
// Expect four keys (two from each family) in half the rows
expectedRows = numRows / 2;
expectedKeys = 4;
f = new QualifierFilter(CompareOp.LESS_OR_EQUAL, new BinaryComparator(Bytes.toBytes("testQualifierOne-2")));
s = new Scan();
s.setFilter(f);
verifyScanNoEarlyOut(s, expectedRows, expectedKeys);
// Match keys not equal
// Expect four keys (two from each family)
// Only look in first group of rows
expectedRows = numRows / 2;
expectedKeys = 4;
f = new QualifierFilter(CompareOp.NOT_EQUAL, new BinaryComparator(Bytes.toBytes("testQualifierOne-2")));
s = new Scan(HConstants.EMPTY_START_ROW, Bytes.toBytes("testRowTwo"));
s.setFilter(f);
verifyScanNoEarlyOut(s, expectedRows, expectedKeys);
// Match keys greater or equal
// Expect four keys (two from each family)
// Only look in first group of rows
expectedRows = numRows / 2;
expectedKeys = 4;
f = new QualifierFilter(CompareOp.GREATER_OR_EQUAL, new BinaryComparator(Bytes.toBytes("testQualifierOne-2")));
s = new Scan(HConstants.EMPTY_START_ROW, Bytes.toBytes("testRowTwo"));
s.setFilter(f);
verifyScanNoEarlyOut(s, expectedRows, expectedKeys);
// Match keys greater
// Expect two keys (one from each family)
// Only look in first group of rows
expectedRows = numRows / 2;
expectedKeys = 2;
f = new QualifierFilter(CompareOp.GREATER, new BinaryComparator(Bytes.toBytes("testQualifierOne-2")));
s = new Scan(HConstants.EMPTY_START_ROW, Bytes.toBytes("testRowTwo"));
s.setFilter(f);
verifyScanNoEarlyOut(s, expectedRows, expectedKeys);
// Match keys not equal to
// Look across rows and fully validate the keys and ordering
// Expect varied numbers of keys, 4 per row in group one, 6 per row in
// group two
f = new QualifierFilter(CompareOp.NOT_EQUAL, new BinaryComparator(QUALIFIERS_ONE[2]));
s = new Scan();
s.setFilter(f);
KeyValue[] kvs = { // testRowOne-0
new KeyValue(ROWS_ONE[0], FAMILIES[0], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[0], FAMILIES[0], QUALIFIERS_ONE[3], VALUES[0]), new KeyValue(ROWS_ONE[0], FAMILIES[1], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[0], FAMILIES[1], QUALIFIERS_ONE[3], VALUES[0]), // testRowOne-2
new KeyValue(ROWS_ONE[2], FAMILIES[0], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[2], FAMILIES[0], QUALIFIERS_ONE[3], VALUES[0]), new KeyValue(ROWS_ONE[2], FAMILIES[1], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[2], FAMILIES[1], QUALIFIERS_ONE[3], VALUES[0]), // testRowOne-3
new KeyValue(ROWS_ONE[3], FAMILIES[0], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[3], FAMILIES[0], QUALIFIERS_ONE[3], VALUES[0]), new KeyValue(ROWS_ONE[3], FAMILIES[1], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[3], FAMILIES[1], QUALIFIERS_ONE[3], VALUES[0]), // testRowTwo-0
new KeyValue(ROWS_TWO[0], FAMILIES[0], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[0], FAMILIES[0], QUALIFIERS_TWO[2], VALUES[1]), new KeyValue(ROWS_TWO[0], FAMILIES[0], QUALIFIERS_TWO[3], VALUES[1]), new KeyValue(ROWS_TWO[0], FAMILIES[1], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[0], FAMILIES[1], QUALIFIERS_TWO[2], VALUES[1]), new KeyValue(ROWS_TWO[0], FAMILIES[1], QUALIFIERS_TWO[3], VALUES[1]), // testRowTwo-2
new KeyValue(ROWS_TWO[2], FAMILIES[0], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[2], FAMILIES[0], QUALIFIERS_TWO[2], VALUES[1]), new KeyValue(ROWS_TWO[2], FAMILIES[0], QUALIFIERS_TWO[3], VALUES[1]), new KeyValue(ROWS_TWO[2], FAMILIES[1], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[2], FAMILIES[1], QUALIFIERS_TWO[2], VALUES[1]), new KeyValue(ROWS_TWO[2], FAMILIES[1], QUALIFIERS_TWO[3], VALUES[1]), // testRowTwo-3
new KeyValue(ROWS_TWO[3], FAMILIES[0], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[3], FAMILIES[0], QUALIFIERS_TWO[2], VALUES[1]), new KeyValue(ROWS_TWO[3], FAMILIES[0], QUALIFIERS_TWO[3], VALUES[1]), new KeyValue(ROWS_TWO[3], FAMILIES[1], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[3], FAMILIES[1], QUALIFIERS_TWO[2], VALUES[1]), new KeyValue(ROWS_TWO[3], FAMILIES[1], QUALIFIERS_TWO[3], VALUES[1]) };
verifyScanFull(s, kvs);
// Test across rows and groups with a regex
// Filter out "test*-2"
// Expect 4 keys per row across both groups
f = new QualifierFilter(CompareOp.NOT_EQUAL, new RegexStringComparator("test.+-2"));
s = new Scan();
s.setFilter(f);
kvs = new KeyValue[] { // testRowOne-0
new KeyValue(ROWS_ONE[0], FAMILIES[0], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[0], FAMILIES[0], QUALIFIERS_ONE[3], VALUES[0]), new KeyValue(ROWS_ONE[0], FAMILIES[1], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[0], FAMILIES[1], QUALIFIERS_ONE[3], VALUES[0]), // testRowOne-2
new KeyValue(ROWS_ONE[2], FAMILIES[0], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[2], FAMILIES[0], QUALIFIERS_ONE[3], VALUES[0]), new KeyValue(ROWS_ONE[2], FAMILIES[1], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[2], FAMILIES[1], QUALIFIERS_ONE[3], VALUES[0]), // testRowOne-3
new KeyValue(ROWS_ONE[3], FAMILIES[0], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[3], FAMILIES[0], QUALIFIERS_ONE[3], VALUES[0]), new KeyValue(ROWS_ONE[3], FAMILIES[1], QUALIFIERS_ONE[0], VALUES[0]), new KeyValue(ROWS_ONE[3], FAMILIES[1], QUALIFIERS_ONE[3], VALUES[0]), // testRowTwo-0
new KeyValue(ROWS_TWO[0], FAMILIES[0], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[0], FAMILIES[0], QUALIFIERS_TWO[3], VALUES[1]), new KeyValue(ROWS_TWO[0], FAMILIES[1], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[0], FAMILIES[1], QUALIFIERS_TWO[3], VALUES[1]), // testRowTwo-2
new KeyValue(ROWS_TWO[2], FAMILIES[0], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[2], FAMILIES[0], QUALIFIERS_TWO[3], VALUES[1]), new KeyValue(ROWS_TWO[2], FAMILIES[1], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[2], FAMILIES[1], QUALIFIERS_TWO[3], VALUES[1]), // testRowTwo-3
new KeyValue(ROWS_TWO[3], FAMILIES[0], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[3], FAMILIES[0], QUALIFIERS_TWO[3], VALUES[1]), new KeyValue(ROWS_TWO[3], FAMILIES[1], QUALIFIERS_TWO[0], VALUES[1]), new KeyValue(ROWS_TWO[3], FAMILIES[1], QUALIFIERS_TWO[3], VALUES[1]) };
verifyScanFull(s, kvs);
}
use of org.apache.hadoop.hbase.filter.Filter in project hbase by apache.
the class Import method instantiateFilter.
/**
* Create a {@link Filter} to apply to all incoming keys ({@link KeyValue KeyValues}) to
* optionally not include in the job output
* @param conf {@link Configuration} from which to load the filter
* @return the filter to use for the task, or <tt>null</tt> if no filter to should be used
* @throws IllegalArgumentException if the filter is misconfigured
*/
public static Filter instantiateFilter(Configuration conf) {
// get the filter, if it was configured
Class<? extends Filter> filterClass = conf.getClass(FILTER_CLASS_CONF_KEY, null, Filter.class);
if (filterClass == null) {
LOG.debug("No configured filter class, accepting all keyvalues.");
return null;
}
LOG.debug("Attempting to create filter:" + filterClass);
String[] filterArgs = conf.getStrings(FILTER_ARGS_CONF_KEY);
ArrayList<byte[]> quotedArgs = toQuotedByteArrays(filterArgs);
try {
Method m = filterClass.getMethod("createFilterFromArguments", ArrayList.class);
return (Filter) m.invoke(null, quotedArgs);
} catch (IllegalAccessException e) {
LOG.error("Couldn't instantiate filter!", e);
throw new RuntimeException(e);
} catch (SecurityException e) {
LOG.error("Couldn't instantiate filter!", e);
throw new RuntimeException(e);
} catch (NoSuchMethodException e) {
LOG.error("Couldn't instantiate filter!", e);
throw new RuntimeException(e);
} catch (IllegalArgumentException e) {
LOG.error("Couldn't instantiate filter!", e);
throw new RuntimeException(e);
} catch (InvocationTargetException e) {
LOG.error("Couldn't instantiate filter!", e);
throw new RuntimeException(e);
}
}
use of org.apache.hadoop.hbase.filter.Filter in project hbase by apache.
the class Export method getConfiguredScanForJob.
private static Scan getConfiguredScanForJob(Configuration conf, String[] args) throws IOException {
Scan s = new Scan();
// Optional arguments.
// Set Scan Versions
int versions = args.length > 2 ? Integer.parseInt(args[2]) : 1;
s.setMaxVersions(versions);
// Set Scan Range
long startTime = args.length > 3 ? Long.parseLong(args[3]) : 0L;
long endTime = args.length > 4 ? Long.parseLong(args[4]) : Long.MAX_VALUE;
s.setTimeRange(startTime, endTime);
// Set cache blocks
s.setCacheBlocks(false);
// set Start and Stop row
if (conf.get(TableInputFormat.SCAN_ROW_START) != null) {
s.setStartRow(Bytes.toBytesBinary(conf.get(TableInputFormat.SCAN_ROW_START)));
}
if (conf.get(TableInputFormat.SCAN_ROW_STOP) != null) {
s.setStopRow(Bytes.toBytesBinary(conf.get(TableInputFormat.SCAN_ROW_STOP)));
}
// Set Scan Column Family
boolean raw = Boolean.parseBoolean(conf.get(RAW_SCAN));
if (raw) {
s.setRaw(raw);
}
for (String columnFamily : conf.getTrimmedStrings(TableInputFormat.SCAN_COLUMN_FAMILY)) {
s.addFamily(Bytes.toBytes(columnFamily));
}
// Set RowFilter or Prefix Filter if applicable.
Filter exportFilter = getExportFilter(args);
if (exportFilter != null) {
LOG.info("Setting Scan Filter for Export.");
s.setFilter(exportFilter);
}
int batching = conf.getInt(EXPORT_BATCHING, -1);
if (batching != -1) {
try {
s.setBatch(batching);
} catch (IncompatibleFilterException e) {
LOG.error("Batching could not be set", e);
}
}
LOG.info("versions=" + versions + ", starttime=" + startTime + ", endtime=" + endTime + ", keepDeletedCells=" + raw);
return s;
}
Aggregations