use of org.apache.drill.exec.store.hbase.HBaseRegexParser in project drill by apache.
the class MapRDBFilterBuilder method createHBaseScanSpec.
private HBaseScanSpec createHBaseScanSpec(FunctionCall call, CompareFunctionsProcessor processor) {
String functionName = processor.getFunctionName();
SchemaPath field = processor.getPath();
byte[] fieldValue = processor.getValue();
boolean sortOrderAscending = processor.isSortOrderAscending();
boolean isRowKey = field.getAsUnescapedPath().equals(ROW_KEY);
if (!(isRowKey || (!field.getRootSegment().isLastPath() && field.getRootSegment().getChild().isLastPath() && field.getRootSegment().getChild().isNamed()))) {
/*
* if the field in this function is neither the row_key nor a qualified HBase column, return.
*/
return null;
}
if (processor.isRowKeyPrefixComparison()) {
return createRowKeyPrefixScanSpec(call, processor);
}
CompareOp compareOp = null;
boolean isNullTest = false;
ByteArrayComparable comparator = new BinaryComparator(fieldValue);
byte[] startRow = HConstants.EMPTY_START_ROW;
byte[] stopRow = HConstants.EMPTY_END_ROW;
switch(functionName) {
case "equal":
compareOp = CompareOp.EQUAL;
if (isRowKey) {
startRow = fieldValue;
/* stopRow should be just greater than 'value'*/
stopRow = Arrays.copyOf(fieldValue, fieldValue.length + 1);
compareOp = CompareOp.EQUAL;
}
break;
case "not_equal":
compareOp = CompareOp.NOT_EQUAL;
break;
case "greater_than_or_equal_to":
if (sortOrderAscending) {
compareOp = CompareOp.GREATER_OR_EQUAL;
if (isRowKey) {
startRow = fieldValue;
}
} else {
compareOp = CompareOp.LESS_OR_EQUAL;
if (isRowKey) {
// stopRow should be just greater than 'value'
stopRow = Arrays.copyOf(fieldValue, fieldValue.length + 1);
}
}
break;
case "greater_than":
if (sortOrderAscending) {
compareOp = CompareOp.GREATER;
if (isRowKey) {
// startRow should be just greater than 'value'
startRow = Arrays.copyOf(fieldValue, fieldValue.length + 1);
}
} else {
compareOp = CompareOp.LESS;
if (isRowKey) {
stopRow = fieldValue;
}
}
break;
case "less_than_or_equal_to":
if (sortOrderAscending) {
compareOp = CompareOp.LESS_OR_EQUAL;
if (isRowKey) {
// stopRow should be just greater than 'value'
stopRow = Arrays.copyOf(fieldValue, fieldValue.length + 1);
}
} else {
compareOp = CompareOp.GREATER_OR_EQUAL;
if (isRowKey) {
startRow = fieldValue;
}
}
break;
case "less_than":
if (sortOrderAscending) {
compareOp = CompareOp.LESS;
if (isRowKey) {
stopRow = fieldValue;
}
} else {
compareOp = CompareOp.GREATER;
if (isRowKey) {
// startRow should be just greater than 'value'
startRow = Arrays.copyOf(fieldValue, fieldValue.length + 1);
}
}
break;
case "isnull":
case "isNull":
case "is null":
if (isRowKey) {
return null;
}
isNullTest = true;
compareOp = CompareOp.EQUAL;
comparator = new NullComparator();
break;
case "isnotnull":
case "isNotNull":
case "is not null":
if (isRowKey) {
return null;
}
compareOp = CompareOp.NOT_EQUAL;
comparator = new NullComparator();
break;
case "like":
/*
* Convert the LIKE operand to Regular Expression pattern so that we can
* apply RegexStringComparator()
*/
HBaseRegexParser parser = new HBaseRegexParser(call).parse();
compareOp = CompareOp.EQUAL;
comparator = new RegexStringComparator(parser.getRegexString());
/*
* We can possibly do better if the LIKE operator is on the row_key
*/
if (isRowKey) {
String prefix = parser.getPrefixString();
if (prefix != null) {
/*
* If there is a literal prefix, it can help us prune the scan to a sub range
*/
if (prefix.equals(parser.getLikeString())) {
/* The operand value is literal. This turns the LIKE operator to EQUAL operator */
startRow = stopRow = fieldValue;
compareOp = null;
} else {
startRow = prefix.getBytes(Charsets.UTF_8);
stopRow = startRow.clone();
boolean isMaxVal = true;
for (int i = stopRow.length - 1; i >= 0; --i) {
int nextByteValue = (0xff & stopRow[i]) + 1;
if (nextByteValue < 0xff) {
stopRow[i] = (byte) nextByteValue;
isMaxVal = false;
break;
} else {
stopRow[i] = 0;
}
}
if (isMaxVal) {
stopRow = HConstants.EMPTY_END_ROW;
}
}
}
}
break;
}
if (compareOp != null || startRow != HConstants.EMPTY_START_ROW || stopRow != HConstants.EMPTY_END_ROW) {
Filter filter = null;
if (isRowKey) {
if (compareOp != null) {
filter = new RowFilter(compareOp, comparator);
}
} else {
byte[] family = HBaseUtils.getBytes(field.getRootSegment().getPath());
byte[] qualifier = HBaseUtils.getBytes(field.getRootSegment().getChild().getNameSegment().getPath());
filter = new SingleColumnValueFilter(family, qualifier, compareOp, comparator);
((SingleColumnValueFilter) filter).setLatestVersionOnly(true);
if (!isNullTest) {
((SingleColumnValueFilter) filter).setFilterIfMissing(true);
}
}
return new HBaseScanSpec(groupScan.getTableName(), startRow, stopRow, filter);
}
// else
return null;
}
use of org.apache.drill.exec.store.hbase.HBaseRegexParser in project drill by apache.
the class TestHBaseRegexParser method testLikeExprToRegex.
@Test
public void testLikeExprToRegex() throws Exception {
HBaseRegexParser parser = new HBaseRegexParser("ABC%[0-7][0-9A-Fa-f]").parse();
assertEquals("^\\QABC\\E.*[0-7][0-9A-Fa-f]$", parser.getRegexString());
assertEquals("ABC", parser.getPrefixString());
Pattern pattern = Pattern.compile(parser.getRegexString(), Pattern.DOTALL);
assertTrue(pattern.matcher("ABC79").matches());
assertTrue(pattern.matcher("ABCxxxxxxx79").matches());
parser = new HBaseRegexParser("ABC%[0-8]%_").parse();
assertEquals("^\\QABC\\E.*[0-8].*.$", parser.getRegexString());
assertEquals("ABC", parser.getPrefixString());
pattern = Pattern.compile(parser.getRegexString(), Pattern.DOTALL);
assertTrue(pattern.matcher("ABC79").matches());
assertTrue(pattern.matcher("ABCxxxx79").matches());
assertTrue(pattern.matcher("ABCxxxx7xxxxx9").matches());
assertTrue(pattern.matcher("ABC[0-8]_").matches());
parser = new HBaseRegexParser("ABC%[0-8]%_", '%').parse();
assertEquals("^\\QABC\\E\\Q[\\E\\Q0-8]\\E\\Q_\\E$", parser.getRegexString());
assertEquals("ABC[0-8]_", parser.getPrefixString());
pattern = Pattern.compile(parser.getRegexString(), Pattern.DOTALL);
assertFalse(pattern.matcher("ABC79").matches());
assertTrue(pattern.matcher("ABC[0-8]_").matches());
try {
parser = new HBaseRegexParser("ABC%[0-8][^a-f]%", '%').parse();
fail("Parsed an illegal LIKE expression.");
} catch (IllegalArgumentException e) {
}
}
Aggregations