use of org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters in project hive by apache.
the class TestHiveAccumuloTableInputFormat method testConfigureAccumuloInputFormatWithEmptyColumns.
@Test
public void testConfigureAccumuloInputFormatWithEmptyColumns() throws Exception {
AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf);
ColumnMapper columnMapper = new ColumnMapper(conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), columnNames, columnTypes);
HashSet<Pair<Text, Text>> cfCqPairs = Sets.newHashSet();
List<IteratorSetting> iterators = new ArrayList<IteratorSetting>();
Set<Range> ranges = Collections.singleton(new Range());
String instanceName = "realInstance";
String zookeepers = "host1:2181,host2:2181,host3:2181";
IteratorSetting cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class);
cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, StringCompare.class.getName());
cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName());
cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "dave");
cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:name");
iterators.add(cfg);
cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class);
cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, IntCompare.class.getName());
cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName());
cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "50");
cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:age");
iterators.add(cfg);
ZooKeeperInstance zkInstance = Mockito.mock(ZooKeeperInstance.class);
HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class);
// Stub out the ZKI mock
Mockito.when(zkInstance.getInstanceName()).thenReturn(instanceName);
Mockito.when(zkInstance.getZooKeepers()).thenReturn(zookeepers);
Mockito.when(mockInputFormat.getPairCollection(columnMapper.getColumnMappings())).thenReturn(cfCqPairs);
// Call out to the real configure method
Mockito.doCallRealMethod().when(mockInputFormat).configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);
// Also compute the correct cf:cq pairs so we can assert the right argument was passed
Mockito.doCallRealMethod().when(mockInputFormat).getPairCollection(columnMapper.getColumnMappings());
mockInputFormat.configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);
// Verify that the correct methods are invoked on AccumuloInputFormat
Mockito.verify(mockInputFormat).setZooKeeperInstance(conf, instanceName, zookeepers, false);
Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS));
Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE);
Mockito.verify(mockInputFormat).setScanAuthorizations(conf, con.securityOperations().getUserAuthorizations(USER));
Mockito.verify(mockInputFormat).addIterators(conf, iterators);
Mockito.verify(mockInputFormat).setRanges(conf, ranges);
// fetchColumns is not called because we had no columns to fetch
}
use of org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters in project hive by apache.
the class TestHiveAccumuloTableInputFormat method testConfigureMockAccumuloInputFormat.
@Test
public void testConfigureMockAccumuloInputFormat() throws Exception {
AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf);
ColumnMapper columnMapper = new ColumnMapper(conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), columnNames, columnTypes);
Set<Pair<Text, Text>> cfCqPairs = inputformat.getPairCollection(columnMapper.getColumnMappings());
List<IteratorSetting> iterators = Collections.emptyList();
Set<Range> ranges = Collections.singleton(new Range());
HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class);
// Call out to the real configure method
Mockito.doCallRealMethod().when(mockInputFormat).configure(conf, mockInstance, con, accumuloParams, columnMapper, iterators, ranges);
// Also compute the correct cf:cq pairs so we can assert the right argument was passed
Mockito.doCallRealMethod().when(mockInputFormat).getPairCollection(columnMapper.getColumnMappings());
mockInputFormat.configure(conf, mockInstance, con, accumuloParams, columnMapper, iterators, ranges);
// Verify that the correct methods are invoked on AccumuloInputFormat
Mockito.verify(mockInputFormat).setMockInstance(conf, mockInstance.getInstanceName());
Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS));
Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE);
Mockito.verify(mockInputFormat).setScanAuthorizations(conf, con.securityOperations().getUserAuthorizations(USER));
Mockito.verify(mockInputFormat).addIterators(conf, iterators);
Mockito.verify(mockInputFormat).setRanges(conf, ranges);
Mockito.verify(mockInputFormat).fetchColumns(conf, cfCqPairs);
}
use of org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters in project hive by apache.
the class HiveAccumuloTableInputFormat method getRecordReader.
/**
* Setup accumulo input format from conf properties. Delegates to final RecordReader from mapred
* package.
*
* @param inputSplit
* @param jobConf
* @param reporter
* @return RecordReader
* @throws IOException
*/
@Override
public RecordReader<Text, AccumuloHiveRow> getRecordReader(InputSplit inputSplit, final JobConf jobConf, final Reporter reporter) throws IOException {
final ColumnMapper columnMapper;
try {
columnMapper = getColumnMapper(jobConf);
} catch (TooManyAccumuloColumnsException e) {
throw new IOException(e);
}
try {
final List<IteratorSetting> iterators = predicateHandler.getIterators(jobConf, columnMapper);
HiveAccumuloSplit hiveSplit = (HiveAccumuloSplit) inputSplit;
RangeInputSplit rangeSplit = hiveSplit.getSplit();
log.info("Split: " + rangeSplit);
// Should be fixed in Accumulo 1.5.2 and 1.6.1
if (null == rangeSplit.getIterators() || (rangeSplit.getIterators().isEmpty() && !iterators.isEmpty())) {
log.debug("Re-setting iterators on InputSplit due to Accumulo bug.");
rangeSplit.setIterators(iterators);
}
// but we want it to, so just re-set it if it's null.
if (null == getTableName(rangeSplit)) {
final AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(jobConf);
log.debug("Re-setting table name on InputSplit due to Accumulo bug.");
setTableName(rangeSplit, accumuloParams.getAccumuloTableName());
}
final RecordReader<Text, PeekingIterator<Map.Entry<Key, Value>>> recordReader = accumuloInputFormat.getRecordReader(rangeSplit, jobConf, reporter);
return new HiveAccumuloRecordReader(recordReader, iterators.size());
} catch (SerDeException e) {
throw new IOException(StringUtils.stringifyException(e));
}
}
use of org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters in project hive by apache.
the class TestHiveAccumuloTableOutputFormat method testSaslConfiguration.
@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testSaslConfiguration() throws IOException, AccumuloException, AccumuloSecurityException {
final HiveAccumuloTableOutputFormat outputFormat = Mockito.mock(HiveAccumuloTableOutputFormat.class);
final AuthenticationToken authToken = Mockito.mock(AuthenticationToken.class);
final Token hadoopToken = Mockito.mock(Token.class);
final HiveAccumuloHelper helper = Mockito.mock(HiveAccumuloHelper.class);
final AccumuloConnectionParameters cnxnParams = Mockito.mock(AccumuloConnectionParameters.class);
final Connector connector = Mockito.mock(Connector.class);
// Set UGI to use Kerberos
// Have to use the string constant to support hadoop 1
conf.set("hadoop.security.authentication", "kerberos");
UserGroupInformation.setConfiguration(conf);
// Set the current UGI to a fake user
UserGroupInformation user1 = UserGroupInformation.createUserForTesting(user, new String[0]);
// Use that as the "current user"
Mockito.when(outputFormat.getCurrentUser()).thenReturn(user1);
// Turn off passwords, enable sasl and set a keytab
conf.unset(AccumuloConnectionParameters.USER_PASS);
// Call the real method instead of the mock
Mockito.doCallRealMethod().when(outputFormat).configureAccumuloOutputFormat(conf);
// Return our mocked objects
Mockito.when(outputFormat.getHelper()).thenReturn(helper);
Mockito.when(outputFormat.getConnectionParams(conf)).thenReturn(cnxnParams);
Mockito.when(cnxnParams.getConnector()).thenReturn(connector);
Mockito.when(helper.getDelegationToken(connector)).thenReturn(authToken);
Mockito.when(helper.getHadoopToken(authToken)).thenReturn(hadoopToken);
// Stub AccumuloConnectionParameters actions
Mockito.when(cnxnParams.useSasl()).thenReturn(true);
Mockito.when(cnxnParams.getAccumuloUserName()).thenReturn(user);
Mockito.when(cnxnParams.getAccumuloInstanceName()).thenReturn(instanceName);
Mockito.when(cnxnParams.getZooKeepers()).thenReturn(zookeepers);
// Stub OutputFormat actions
Mockito.when(outputFormat.hasKerberosCredentials(user1)).thenReturn(true);
// Invoke the method
outputFormat.configureAccumuloOutputFormat(conf);
// The AccumuloInputFormat methods
Mockito.verify(outputFormat).setZooKeeperInstanceWithErrorChecking(conf, instanceName, zookeepers, true);
Mockito.verify(outputFormat).setConnectorInfoWithErrorChecking(conf, user, authToken);
Mockito.verify(outputFormat).setDefaultAccumuloTableName(conf, outputTable);
// Other methods we expect
Mockito.verify(helper).mergeTokenIntoJobConf(conf, hadoopToken);
// Make sure the token made it into the UGI
Collection<Token<? extends TokenIdentifier>> tokens = user1.getTokens();
Assert.assertEquals(1, tokens.size());
Assert.assertEquals(hadoopToken, tokens.iterator().next());
}
Aggregations