use of org.apache.hadoop.hive.accumulo.columns.ColumnMapper in project hive by apache.
the class HiveAccumuloTableInputFormat method getSplits.
@Override
public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
final AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(jobConf);
final Instance instance = accumuloParams.getInstance();
final ColumnMapper columnMapper;
try {
columnMapper = getColumnMapper(jobConf);
} catch (TooManyAccumuloColumnsException e) {
throw new IOException(e);
}
JobContext context = ShimLoader.getHadoopShims().newJobContext(Job.getInstance(jobConf));
Path[] tablePaths = FileInputFormat.getInputPaths(context);
try {
Connector connector = null;
// Need to get a Connector so we look up the user's authorizations if not otherwise specified
if (accumuloParams.useSasl()) {
log.info("Current user: " + UserGroupInformation.getCurrentUser());
// In a YARN/Tez job, don't have the Kerberos credentials anymore, use the delegation token
AuthenticationToken token = ConfiguratorBase.getAuthenticationToken(AccumuloInputFormat.class, jobConf);
if (null != token && !jobConf.getCredentials().getAllTokens().isEmpty()) {
// Convert the stub from the configuration back into a normal Token
log.info("Found authentication token in Configuration: " + token);
log.info("Job credential tokens: " + jobConf.getCredentials().getAllTokens());
AuthenticationToken unwrappedToken = ConfiguratorBase.unwrapAuthenticationToken(jobConf, token);
log.info("Converted authentication token from Configuration into: " + unwrappedToken);
// will return back the original token (which we know is insufficient)
if (unwrappedToken != token) {
log.info("Creating Accumulo Connector with unwrapped delegation token");
connector = instance.getConnector(accumuloParams.getAccumuloUserName(), unwrappedToken);
} else {
log.info("Job credentials did not contain delegation token, fetching new token");
}
}
if (connector == null) {
log.info("Obtaining Accumulo Connector using KerberosToken");
// Construct a KerberosToken -- relies on ProxyUser configuration. Will be the client making
// the request on top of the HS2's user. Accumulo will require proper proxy-user auth configs.
connector = instance.getConnector(accumuloParams.getAccumuloUserName(), new KerberosToken(accumuloParams.getAccumuloUserName()));
}
} else {
// Still in the local JVM, use the username+password or Kerberos credentials
connector = accumuloParams.getConnector(instance);
}
final List<ColumnMapping> columnMappings = columnMapper.getColumnMappings();
final List<IteratorSetting> iterators = predicateHandler.getIterators(jobConf, columnMapper);
final Collection<Range> ranges = predicateHandler.getRanges(jobConf, columnMapper);
// We don't want that.
if (null != ranges && ranges.isEmpty()) {
return new InputSplit[0];
}
// Set the relevant information in the Configuration for the AccumuloInputFormat
configure(jobConf, instance, connector, accumuloParams, columnMapper, iterators, ranges);
int numColumns = columnMappings.size();
List<Integer> readColIds = ColumnProjectionUtils.getReadColumnIDs(jobConf);
// Sanity check
if (numColumns < readColIds.size())
throw new IOException("Number of column mappings (" + numColumns + ")" + " numbers less than the hive table columns. (" + readColIds.size() + ")");
// get splits from Accumulo
InputSplit[] splits = accumuloInputFormat.getSplits(jobConf, numSplits);
HiveAccumuloSplit[] hiveSplits = new HiveAccumuloSplit[splits.length];
for (int i = 0; i < splits.length; i++) {
RangeInputSplit ris = (RangeInputSplit) splits[i];
ris.setLogLevel(Level.DEBUG);
hiveSplits[i] = new HiveAccumuloSplit(ris, tablePaths[0]);
}
return hiveSplits;
} catch (AccumuloException e) {
log.error("Could not configure AccumuloInputFormat", e);
throw new IOException(StringUtils.stringifyException(e));
} catch (AccumuloSecurityException e) {
log.error("Could not configure AccumuloInputFormat", e);
throw new IOException(StringUtils.stringifyException(e));
} catch (SerDeException e) {
log.error("Could not configure AccumuloInputFormat", e);
throw new IOException(StringUtils.stringifyException(e));
}
}
use of org.apache.hadoop.hive.accumulo.columns.ColumnMapper in project hive by apache.
the class TestDefaultAccumuloRowIdFactory method testCorrectComplexInspectors.
@Test
public void testCorrectComplexInspectors() throws SerDeException {
AccumuloSerDe accumuloSerDe = new AccumuloSerDe();
Properties properties = new Properties();
Configuration conf = new Configuration();
properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq");
properties.setProperty(serdeConstants.LIST_COLUMNS, "row,col");
properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "struct<col1:int,col2:int>,map<string,string>");
accumuloSerDe.initialize(conf, properties, null);
AccumuloRowIdFactory factory = accumuloSerDe.getParams().getRowIdFactory();
List<TypeInfo> columnTypes = accumuloSerDe.getParams().getHiveColumnTypes();
ColumnMapper mapper = accumuloSerDe.getParams().getColumnMapper();
LazySerDeParameters serDeParams = accumuloSerDe.getParams().getSerDeParameters();
List<ObjectInspector> OIs = accumuloSerDe.getColumnObjectInspectors(columnTypes, serDeParams, mapper.getColumnMappings(), factory);
// Expect the correct OIs
Assert.assertEquals(2, OIs.size());
Assert.assertEquals(LazySimpleStructObjectInspector.class, OIs.get(0).getClass());
Assert.assertEquals(LazyMapObjectInspector.class, OIs.get(1).getClass());
LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) OIs.get(0);
Assert.assertEquals(2, (int) structOI.getSeparator());
LazyMapObjectInspector mapOI = (LazyMapObjectInspector) OIs.get(1);
Assert.assertEquals(2, (int) mapOI.getItemSeparator());
Assert.assertEquals(3, (int) mapOI.getKeyValueSeparator());
}
use of org.apache.hadoop.hive.accumulo.columns.ColumnMapper in project hive by apache.
the class TestDefaultAccumuloRowIdFactory method testCorrectPrimitiveInspectors.
@Test
public void testCorrectPrimitiveInspectors() throws SerDeException {
AccumuloSerDe accumuloSerDe = new AccumuloSerDe();
Properties properties = new Properties();
Configuration conf = new Configuration();
properties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowID,cf:cq");
properties.setProperty(serdeConstants.LIST_COLUMNS, "row,col");
properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "string,int");
accumuloSerDe.initialize(conf, properties, null);
AccumuloRowIdFactory factory = accumuloSerDe.getParams().getRowIdFactory();
List<TypeInfo> columnTypes = accumuloSerDe.getParams().getHiveColumnTypes();
ColumnMapper mapper = accumuloSerDe.getParams().getColumnMapper();
LazySerDeParameters serDeParams = accumuloSerDe.getParams().getSerDeParameters();
List<ObjectInspector> OIs = accumuloSerDe.getColumnObjectInspectors(columnTypes, serDeParams, mapper.getColumnMappings(), factory);
Assert.assertEquals(2, OIs.size());
Assert.assertEquals(LazyStringObjectInspector.class, OIs.get(0).getClass());
Assert.assertEquals(LazyIntObjectInspector.class, OIs.get(1).getClass());
}
use of org.apache.hadoop.hive.accumulo.columns.ColumnMapper in project hive by apache.
the class TestHiveAccumuloTableInputFormat method testMapColumnPairs.
@Test
public void testMapColumnPairs() throws TooManyAccumuloColumnsException {
ColumnMapper columnMapper = new ColumnMapper(":rowID,cf:*", conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), Arrays.asList("row", "col"), Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo)));
Set<Pair<Text, Text>> pairs = inputformat.getPairCollection(columnMapper.getColumnMappings());
Assert.assertEquals(1, pairs.size());
Pair<Text, Text> cfCq = pairs.iterator().next();
Assert.assertEquals("cf", cfCq.getFirst().toString());
Assert.assertNull(cfCq.getSecond());
}
use of org.apache.hadoop.hive.accumulo.columns.ColumnMapper in project hive by apache.
the class TestHiveAccumuloTableInputFormat method testConfigureAccumuloInputFormatWithIterators.
@Test
public void testConfigureAccumuloInputFormatWithIterators() throws Exception {
AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf);
ColumnMapper columnMapper = new ColumnMapper(conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), columnNames, columnTypes);
Set<Pair<Text, Text>> cfCqPairs = inputformat.getPairCollection(columnMapper.getColumnMappings());
List<IteratorSetting> iterators = new ArrayList<IteratorSetting>();
Set<Range> ranges = Collections.singleton(new Range());
String instanceName = "realInstance";
String zookeepers = "host1:2181,host2:2181,host3:2181";
IteratorSetting cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class);
cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, StringCompare.class.getName());
cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName());
cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "dave");
cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:name");
iterators.add(cfg);
cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class);
cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, IntCompare.class.getName());
cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName());
cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "50");
cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:age");
iterators.add(cfg);
ZooKeeperInstance zkInstance = Mockito.mock(ZooKeeperInstance.class);
HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class);
HiveAccumuloHelper helper = Mockito.mock(HiveAccumuloHelper.class);
// Stub out the ZKI mock
Mockito.when(zkInstance.getInstanceName()).thenReturn(instanceName);
Mockito.when(zkInstance.getZooKeepers()).thenReturn(zookeepers);
// Stub out a mocked Helper instance
Mockito.when(mockInputFormat.getHelper()).thenReturn(helper);
// Call out to the real configure method
Mockito.doCallRealMethod().when(mockInputFormat).configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);
// Also compute the correct cf:cq pairs so we can assert the right argument was passed
Mockito.doCallRealMethod().when(mockInputFormat).getPairCollection(columnMapper.getColumnMappings());
mockInputFormat.configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);
// Verify that the correct methods are invoked on AccumuloInputFormat
Mockito.verify(helper).setInputFormatZooKeeperInstance(conf, instanceName, zookeepers, false);
Mockito.verify(helper).setInputFormatConnectorInfo(conf, USER, new PasswordToken(PASS));
Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE);
Mockito.verify(mockInputFormat).setScanAuthorizations(conf, con.securityOperations().getUserAuthorizations(USER));
Mockito.verify(mockInputFormat).addIterators(conf, iterators);
Mockito.verify(mockInputFormat).setRanges(conf, ranges);
Mockito.verify(mockInputFormat).fetchColumns(conf, cfCqPairs);
}
Aggregations