use of org.apache.accumulo.core.client.IteratorSetting in project hive by apache.
the class TestHiveAccumuloTableInputFormat method testDegreesAndMillis.
@Test
public void testDegreesAndMillis() throws Exception {
Connector con = mockInstance.getConnector(USER, new PasswordToken(PASS.getBytes()));
Scanner scan = con.createScanner(TEST_TABLE, new Authorizations("blah"));
IteratorSetting is = new IteratorSetting(1, PrimitiveComparisonFilter.FILTER_PREFIX + 1, PrimitiveComparisonFilter.class);
is.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, DoubleCompare.class.getName());
is.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, GreaterThanOrEqual.class.getName());
is.addOption(PrimitiveComparisonFilter.CONST_VAL, new String(Base64.encodeBase64(parseDoubleBytes("55.6"))));
is.addOption(PrimitiveComparisonFilter.COLUMN, "cf:dgrs");
scan.addScanIterator(is);
IteratorSetting is2 = new IteratorSetting(2, PrimitiveComparisonFilter.FILTER_PREFIX + 2, PrimitiveComparisonFilter.class);
is2.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, LongCompare.class.getName());
is2.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, LessThan.class.getName());
is2.addOption(PrimitiveComparisonFilter.CONST_VAL, new String(Base64.encodeBase64(parseLongBytes("778"))));
is2.addOption(PrimitiveComparisonFilter.COLUMN, "cf:mills");
scan.addScanIterator(is2);
boolean foundDennis = false;
int totalCount = 0;
for (Map.Entry<Key, Value> kv : scan) {
boolean foundName = false;
boolean foundSid = false;
boolean foundDegrees = false;
boolean foundMillis = false;
SortedMap<Key, Value> items = PrimitiveComparisonFilter.decodeRow(kv.getKey(), kv.getValue());
for (Map.Entry<Key, Value> item : items.entrySet()) {
SortedMap<Key, Value> nestedItems = PrimitiveComparisonFilter.decodeRow(item.getKey(), item.getValue());
for (Map.Entry<Key, Value> nested : nestedItems.entrySet()) {
if (nested.getKey().getRow().toString().equals("r3")) {
foundDennis = true;
}
if (nested.getKey().getColumnQualifier().equals(NAME)) {
foundName = true;
} else if (nested.getKey().getColumnQualifier().equals(SID)) {
foundSid = true;
} else if (nested.getKey().getColumnQualifier().equals(DEGREES)) {
foundDegrees = true;
} else if (nested.getKey().getColumnQualifier().equals(MILLIS)) {
foundMillis = true;
}
}
}
totalCount++;
assertTrue(foundDegrees & foundMillis & foundName & foundSid);
}
assertTrue(foundDennis);
assertEquals(totalCount, 1);
}
use of org.apache.accumulo.core.client.IteratorSetting in project hive by apache.
the class HiveAccumuloTableInputFormat method getSplits.
@Override
public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
final AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(jobConf);
final Instance instance = accumuloParams.getInstance();
final ColumnMapper columnMapper;
try {
columnMapper = getColumnMapper(jobConf);
} catch (TooManyAccumuloColumnsException e) {
throw new IOException(e);
}
JobContext context = ShimLoader.getHadoopShims().newJobContext(Job.getInstance(jobConf));
Path[] tablePaths = FileInputFormat.getInputPaths(context);
try {
UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
final Connector connector;
// Need to get a Connector so we look up the user's authorizations if not otherwise specified
if (accumuloParams.useSasl() && !ugi.hasKerberosCredentials()) {
// In a YARN/Tez job, don't have the Kerberos credentials anymore, use the delegation token
AuthenticationToken token = ConfiguratorBase.getAuthenticationToken(AccumuloInputFormat.class, jobConf);
// Convert the stub from the configuration back into a normal Token
// More reflection to support 1.6
token = helper.unwrapAuthenticationToken(jobConf, token);
connector = instance.getConnector(accumuloParams.getAccumuloUserName(), token);
} else {
// Still in the local JVM, use the username+password or Kerberos credentials
connector = accumuloParams.getConnector(instance);
}
final List<ColumnMapping> columnMappings = columnMapper.getColumnMappings();
final List<IteratorSetting> iterators = predicateHandler.getIterators(jobConf, columnMapper);
final Collection<Range> ranges = predicateHandler.getRanges(jobConf, columnMapper);
// We don't want that.
if (null != ranges && ranges.isEmpty()) {
return new InputSplit[0];
}
// Set the relevant information in the Configuration for the AccumuloInputFormat
configure(jobConf, instance, connector, accumuloParams, columnMapper, iterators, ranges);
int numColumns = columnMappings.size();
List<Integer> readColIds = ColumnProjectionUtils.getReadColumnIDs(jobConf);
// Sanity check
if (numColumns < readColIds.size())
throw new IOException("Number of column mappings (" + numColumns + ")" + " numbers less than the hive table columns. (" + readColIds.size() + ")");
// get splits from Accumulo
InputSplit[] splits = accumuloInputFormat.getSplits(jobConf, numSplits);
HiveAccumuloSplit[] hiveSplits = new HiveAccumuloSplit[splits.length];
for (int i = 0; i < splits.length; i++) {
RangeInputSplit ris = (RangeInputSplit) splits[i];
hiveSplits[i] = new HiveAccumuloSplit(ris, tablePaths[0]);
}
return hiveSplits;
} catch (AccumuloException e) {
log.error("Could not configure AccumuloInputFormat", e);
throw new IOException(StringUtils.stringifyException(e));
} catch (AccumuloSecurityException e) {
log.error("Could not configure AccumuloInputFormat", e);
throw new IOException(StringUtils.stringifyException(e));
} catch (SerDeException e) {
log.error("Could not configure AccumuloInputFormat", e);
throw new IOException(StringUtils.stringifyException(e));
}
}
use of org.apache.accumulo.core.client.IteratorSetting in project hive by apache.
the class AccumuloPredicateHandler method getIterators.
/**
* Loop through search conditions and build iterator settings for predicates involving columns
* other than rowID, if any.
*
* @param conf
* Configuration
* @throws SerDeException
*/
public List<IteratorSetting> getIterators(Configuration conf, ColumnMapper columnMapper) throws SerDeException {
List<IteratorSetting> itrs = Lists.newArrayList();
boolean shouldPushdown = conf.getBoolean(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY, AccumuloSerDeParameters.ITERATOR_PUSHDOWN_DEFAULT);
if (!shouldPushdown) {
log.info("Iterator pushdown is disabled for this table");
return itrs;
}
int rowIdOffset = columnMapper.getRowIdOffset();
String[] hiveColumnNamesArr = conf.getStrings(serdeConstants.LIST_COLUMNS);
if (null == hiveColumnNamesArr) {
throw new IllegalArgumentException("Could not find Hive columns in configuration");
}
String hiveRowIdColumnName = null;
if (rowIdOffset >= 0 && rowIdOffset < hiveColumnNamesArr.length) {
hiveRowIdColumnName = hiveColumnNamesArr[rowIdOffset];
}
List<String> hiveColumnNames = Arrays.asList(hiveColumnNamesArr);
for (IndexSearchCondition sc : getSearchConditions(conf)) {
String col = sc.getColumnDesc().getColumn();
if (hiveRowIdColumnName == null || !hiveRowIdColumnName.equals(col)) {
HiveAccumuloColumnMapping mapping = (HiveAccumuloColumnMapping) columnMapper.getColumnMappingForHiveColumn(hiveColumnNames, col);
itrs.add(toSetting(mapping, sc));
}
}
if (log.isInfoEnabled())
log.info("num iterators = " + itrs.size());
return itrs;
}
use of org.apache.accumulo.core.client.IteratorSetting in project hive by apache.
the class TestAccumuloPredicateHandler method testIteratorIgnoreRowIDFields.
@Test
public void testIteratorIgnoreRowIDFields() {
ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
List<ExprNodeDesc> children = Lists.newArrayList();
children.add(column);
children.add(constant);
ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children);
assertNotNull(node);
ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "bbb");
List<ExprNodeDesc> children2 = Lists.newArrayList();
children2.add(column2);
children2.add(constant2);
ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2);
assertNotNull(node2);
List<ExprNodeDesc> bothFilters = Lists.newArrayList();
bothFilters.add(node);
bothFilters.add(node2);
ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
String filterExpr = SerializationUtilities.serializeExpression(both);
conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
try {
List<IteratorSetting> iterators = handler.getIterators(conf, columnMapper);
assertEquals(iterators.size(), 0);
} catch (SerDeException e) {
StringUtils.stringifyException(e);
}
}
use of org.apache.accumulo.core.client.IteratorSetting in project hive by apache.
the class TestAccumuloPredicateHandler method testIgnoreIteratorPushdown.
@Test
public void testIgnoreIteratorPushdown() throws TooManyAccumuloColumnsException {
// Override what's placed in the Configuration by setup()
conf = new JobConf();
List<String> columnNames = Arrays.asList("field1", "field2", "rid");
List<TypeInfo> columnTypes = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo);
conf.set(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columnNames));
conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,int,string");
String columnMappingStr = "cf:f1,cf:f2,:rowID";
conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, columnMappingStr);
columnMapper = new ColumnMapper(columnMappingStr, ColumnEncoding.STRING.getName(), columnNames, columnTypes);
ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "field1", null, false);
ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
List<ExprNodeDesc> children = Lists.newArrayList();
children.add(column);
children.add(constant);
ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children);
assertNotNull(node);
ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "field2", null, false);
ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 5);
List<ExprNodeDesc> children2 = Lists.newArrayList();
children2.add(column2);
children2.add(constant2);
ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2);
assertNotNull(node2);
List<ExprNodeDesc> bothFilters = Lists.newArrayList();
bothFilters.add(node);
bothFilters.add(node2);
ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
String filterExpr = SerializationUtilities.serializeExpression(both);
conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
conf.setBoolean(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY, false);
try {
List<IteratorSetting> iterators = handler.getIterators(conf, columnMapper);
assertEquals(iterators.size(), 0);
} catch (Exception e) {
fail(StringUtils.stringifyException(e));
}
}
Aggregations