use of org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters in project hive by apache.
the class TestLazyAccumuloRow method testDeserializationOfBinaryEncoding.
@Test
public void testDeserializationOfBinaryEncoding() throws Exception {
List<String> columns = Arrays.asList("row", "given_name", "surname", "age", "weight", "height");
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo);
LazySimpleStructObjectInspector objectInspector = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, LazySerDeParameters.DefaultSeparators, new Text("\\N"), false, false, (byte) '\\');
DefaultAccumuloRowIdFactory rowIdFactory = new DefaultAccumuloRowIdFactory();
Properties props = new Properties();
props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid#s,personal:given_name#s,personal:surname#s,personal:age,personal:weight,personal:height");
props.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
props.setProperty(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, ColumnEncoding.BINARY.getName());
AccumuloSerDeParameters params = new AccumuloSerDeParameters(new Configuration(), props, AccumuloSerDe.class.getName());
rowIdFactory.init(params, props);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream out = new DataOutputStream(baos);
LazyAccumuloRow lazyRow = new LazyAccumuloRow(objectInspector);
AccumuloHiveRow hiveRow = new AccumuloHiveRow("1");
hiveRow.add("personal", "given_name", "Bob".getBytes());
hiveRow.add("personal", "surname", "Stevens".getBytes());
out.writeInt(30);
hiveRow.add("personal", "age", baos.toByteArray());
baos.reset();
out.writeInt(200);
hiveRow.add("personal", "weight", baos.toByteArray());
baos.reset();
out.writeInt(72);
hiveRow.add("personal", "height", baos.toByteArray());
ColumnMapper columnMapper = params.getColumnMapper();
lazyRow.init(hiveRow, columnMapper.getColumnMappings(), rowIdFactory);
Object o = lazyRow.getField(0);
Assert.assertNotNull(o);
Assert.assertEquals(LazyString.class, o.getClass());
Assert.assertEquals("1", ((LazyString) o).toString());
o = lazyRow.getField(1);
Assert.assertNotNull(o);
Assert.assertEquals(LazyString.class, o.getClass());
Assert.assertEquals("Bob", ((LazyString) o).toString());
o = lazyRow.getField(2);
Assert.assertNotNull(o);
Assert.assertEquals(LazyString.class, o.getClass());
Assert.assertEquals("Stevens", ((LazyString) o).toString());
o = lazyRow.getField(3);
Assert.assertNotNull(o);
Assert.assertEquals(LazyDioInteger.class, o.getClass());
Assert.assertEquals("30", ((LazyDioInteger) o).toString());
o = lazyRow.getField(4);
Assert.assertNotNull(o);
Assert.assertEquals(LazyDioInteger.class, o.getClass());
Assert.assertEquals("200", ((LazyDioInteger) o).toString());
o = lazyRow.getField(5);
Assert.assertNotNull(o);
Assert.assertEquals(LazyDioInteger.class, o.getClass());
Assert.assertEquals("72", ((LazyDioInteger) o).toString());
}
use of org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters in project hive by apache.
the class TestLazyAccumuloRow method testExpectedDeserializationOfColumns.
@Test
public void testExpectedDeserializationOfColumns() throws Exception {
List<String> columns = Arrays.asList("row", "given_name", "surname", "age", "weight", "height");
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo);
LazySimpleStructObjectInspector objectInspector = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, LazySerDeParameters.DefaultSeparators, new Text("\\N"), false, false, (byte) '\\');
DefaultAccumuloRowIdFactory rowIdFactory = new DefaultAccumuloRowIdFactory();
Properties props = new Properties();
props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,personal:given_name,personal:surname,personal:age,personal:weight,personal:height");
props.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
AccumuloSerDeParameters params = new AccumuloSerDeParameters(new Configuration(), props, AccumuloSerDe.class.getName());
rowIdFactory.init(params, props);
LazyAccumuloRow lazyRow = new LazyAccumuloRow(objectInspector);
AccumuloHiveRow hiveRow = new AccumuloHiveRow("1");
hiveRow.add("personal", "given_name", "Bob".getBytes());
hiveRow.add("personal", "surname", "Stevens".getBytes());
hiveRow.add("personal", "age", "30".getBytes());
hiveRow.add("personal", "weight", "200".getBytes());
hiveRow.add("personal", "height", "72".getBytes());
ColumnMapper columnMapper = params.getColumnMapper();
lazyRow.init(hiveRow, columnMapper.getColumnMappings(), rowIdFactory);
Object o = lazyRow.getField(0);
Assert.assertEquals(LazyString.class, o.getClass());
Assert.assertEquals("1", ((LazyString) o).toString());
o = lazyRow.getField(1);
Assert.assertEquals(LazyString.class, o.getClass());
Assert.assertEquals("Bob", ((LazyString) o).toString());
o = lazyRow.getField(2);
Assert.assertEquals(LazyString.class, o.getClass());
Assert.assertEquals("Stevens", ((LazyString) o).toString());
o = lazyRow.getField(3);
Assert.assertEquals(LazyInteger.class, o.getClass());
Assert.assertEquals("30", ((LazyInteger) o).toString());
o = lazyRow.getField(4);
Assert.assertEquals(LazyInteger.class, o.getClass());
Assert.assertEquals("200", ((LazyInteger) o).toString());
o = lazyRow.getField(5);
Assert.assertEquals(LazyInteger.class, o.getClass());
Assert.assertEquals("72", ((LazyInteger) o).toString());
}
use of org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters in project hive by apache.
the class AccumuloStorageHandler method configureJobConf.
@Override
public void configureJobConf(TableDesc tableDesc, JobConf jobConf) {
helper.loadDependentJars(jobConf);
Properties tblProperties = tableDesc.getProperties();
AccumuloSerDeParameters serDeParams = null;
try {
serDeParams = new AccumuloSerDeParameters(jobConf, tblProperties, AccumuloSerDe.class.getName());
} catch (SerDeException e) {
LOG.error("Could not instantiate AccumuloSerDeParameters", e);
return;
}
try {
serDeParams.getRowIdFactory().addDependencyJars(jobConf);
} catch (IOException e) {
LOG.error("Could not add necessary dependencies for " + serDeParams.getRowIdFactory().getClass(), e);
}
// Job so that it gets passed down to the YARN/Tez task.
if (connectionParams.useSasl()) {
try {
// Open an accumulo connection
Connector conn = connectionParams.getConnector();
// Convert the Accumulo token in a Hadoop token
Token<? extends TokenIdentifier> accumuloToken = helper.setConnectorInfoForInputAndOutput(connectionParams, conn, jobConf);
LOG.debug("Adding Hadoop Token for Accumulo to Job's Credentials");
// Add the Hadoop token to the JobConf
helper.mergeTokenIntoJobConf(jobConf, accumuloToken);
LOG.debug("All job tokens: " + jobConf.getCredentials().getAllTokens());
} catch (Exception e) {
throw new RuntimeException("Failed to obtain DelegationToken for " + connectionParams.getAccumuloUserName(), e);
}
}
}
Aggregations