use of org.apache.hadoop.hive.accumulo.columns.ColumnMapper in project hive by apache.
the class TestHiveAccumuloTableInputFormat method testConfigureMockAccumuloInputFormat.
@Test
public void testConfigureMockAccumuloInputFormat() throws Exception {
AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf);
ColumnMapper columnMapper = new ColumnMapper(conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), columnNames, columnTypes);
Set<Pair<Text, Text>> cfCqPairs = inputformat.getPairCollection(columnMapper.getColumnMappings());
List<IteratorSetting> iterators = Collections.emptyList();
Set<Range> ranges = Collections.singleton(new Range());
HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class);
// Call out to the real configure method
Mockito.doCallRealMethod().when(mockInputFormat).configure(conf, mockInstance, con, accumuloParams, columnMapper, iterators, ranges);
// Also compute the correct cf:cq pairs so we can assert the right argument was passed
Mockito.doCallRealMethod().when(mockInputFormat).getPairCollection(columnMapper.getColumnMappings());
mockInputFormat.configure(conf, mockInstance, con, accumuloParams, columnMapper, iterators, ranges);
// Verify that the correct methods are invoked on AccumuloInputFormat
Mockito.verify(mockInputFormat).setMockInstance(conf, mockInstance.getInstanceName());
Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS));
Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE);
Mockito.verify(mockInputFormat).setScanAuthorizations(conf, con.securityOperations().getUserAuthorizations(USER));
Mockito.verify(mockInputFormat).addIterators(conf, iterators);
Mockito.verify(mockInputFormat).setRanges(conf, ranges);
Mockito.verify(mockInputFormat).fetchColumns(conf, cfCqPairs);
}
use of org.apache.hadoop.hive.accumulo.columns.ColumnMapper in project hive by apache.
the class TestAccumuloPredicateHandler method testCreateIteratorSettings.
@Test
public void testCreateIteratorSettings() throws Exception {
// Override what's placed in the Configuration by setup()
conf = new JobConf();
List<String> columnNames = Arrays.asList("field1", "field2", "rid");
List<TypeInfo> columnTypes = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo);
conf.set(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columnNames));
conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,int,string");
String columnMappingStr = "cf:f1,cf:f2,:rowID";
conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, columnMappingStr);
columnMapper = new ColumnMapper(columnMappingStr, ColumnEncoding.STRING.getName(), columnNames, columnTypes);
ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "field1", null, false);
ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
List<ExprNodeDesc> children = Lists.newArrayList();
children.add(column);
children.add(constant);
ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children);
assertNotNull(node);
ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "field2", null, false);
ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 5);
List<ExprNodeDesc> children2 = Lists.newArrayList();
children2.add(column2);
children2.add(constant2);
ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2);
assertNotNull(node2);
List<ExprNodeDesc> bothFilters = Lists.newArrayList();
bothFilters.add(node);
bothFilters.add(node2);
ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
String filterExpr = SerializationUtilities.serializeExpression(both);
conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
List<IteratorSetting> iterators = handler.getIterators(conf, columnMapper);
assertEquals(iterators.size(), 2);
IteratorSetting is1 = iterators.get(0);
IteratorSetting is2 = iterators.get(1);
boolean foundQual = false;
boolean foundPCompare = false;
boolean foundCOpt = false;
boolean foundConst = false;
for (Map.Entry<String, String> option : is1.getOptions().entrySet()) {
String optKey = option.getKey();
if (optKey.equals(PrimitiveComparisonFilter.COLUMN)) {
foundQual = true;
assertEquals(option.getValue(), "cf:f1");
} else if (optKey.equals(PrimitiveComparisonFilter.CONST_VAL)) {
foundConst = true;
assertEquals(option.getValue(), new String(Base64.encodeBase64("aaa".getBytes())));
} else if (optKey.equals(PrimitiveComparisonFilter.COMPARE_OPT_CLASS)) {
foundCOpt = true;
assertEquals(option.getValue(), LessThanOrEqual.class.getName());
} else if (optKey.equals(PrimitiveComparisonFilter.P_COMPARE_CLASS)) {
foundPCompare = true;
assertEquals(option.getValue(), StringCompare.class.getName());
}
}
assertTrue(foundConst & foundCOpt & foundPCompare & foundQual);
foundQual = false;
foundPCompare = false;
foundCOpt = false;
foundConst = false;
for (Map.Entry<String, String> option : is2.getOptions().entrySet()) {
String optKey = option.getKey();
if (optKey.equals(PrimitiveComparisonFilter.COLUMN)) {
foundQual = true;
assertEquals(option.getValue(), "cf:f2");
} else if (optKey.equals(PrimitiveComparisonFilter.CONST_VAL)) {
foundConst = true;
byte[] intVal = new byte[4];
ByteBuffer.wrap(intVal).putInt(5);
assertEquals(option.getValue(), new String(Base64.encodeBase64(intVal)));
} else if (optKey.equals(PrimitiveComparisonFilter.COMPARE_OPT_CLASS)) {
foundCOpt = true;
assertEquals(option.getValue(), GreaterThan.class.getName());
} else if (optKey.equals(PrimitiveComparisonFilter.P_COMPARE_CLASS)) {
foundPCompare = true;
assertEquals(option.getValue(), IntCompare.class.getName());
}
}
assertTrue(foundConst & foundCOpt & foundPCompare & foundQual);
}
use of org.apache.hadoop.hive.accumulo.columns.ColumnMapper in project hive by apache.
the class HiveAccumuloTableInputFormat method getRecordReader.
/**
* Setup accumulo input format from conf properties. Delegates to final RecordReader from mapred
* package.
*
* @param inputSplit
* @param jobConf
* @param reporter
* @return RecordReader
* @throws IOException
*/
@Override
public RecordReader<Text, AccumuloHiveRow> getRecordReader(InputSplit inputSplit, final JobConf jobConf, final Reporter reporter) throws IOException {
final ColumnMapper columnMapper;
try {
columnMapper = getColumnMapper(jobConf);
} catch (TooManyAccumuloColumnsException e) {
throw new IOException(e);
}
try {
final List<IteratorSetting> iterators = predicateHandler.getIterators(jobConf, columnMapper);
HiveAccumuloSplit hiveSplit = (HiveAccumuloSplit) inputSplit;
RangeInputSplit rangeSplit = hiveSplit.getSplit();
log.info("Split: " + rangeSplit);
// Should be fixed in Accumulo 1.5.2 and 1.6.1
if (null == rangeSplit.getIterators() || (rangeSplit.getIterators().isEmpty() && !iterators.isEmpty())) {
log.debug("Re-setting iterators on InputSplit due to Accumulo bug.");
rangeSplit.setIterators(iterators);
}
// but we want it to, so just re-set it if it's null.
if (null == getTableName(rangeSplit)) {
final AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(jobConf);
log.debug("Re-setting table name on InputSplit due to Accumulo bug.");
setTableName(rangeSplit, accumuloParams.getAccumuloTableName());
}
final RecordReader<Text, PeekingIterator<Map.Entry<Key, Value>>> recordReader = accumuloInputFormat.getRecordReader(rangeSplit, jobConf, reporter);
return new HiveAccumuloRecordReader(recordReader, iterators.size());
} catch (SerDeException e) {
throw new IOException(StringUtils.stringifyException(e));
}
}
Aggregations