use of org.apache.hadoop.hive.accumulo.columns.ColumnMapper in project hive by apache.
the class TestLazyAccumuloRow method testNullInit.
@Test
public void testNullInit() throws SerDeException {
List<String> columns = Arrays.asList("row", "1", "2", "3");
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME), TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME), TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME), TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME));
LazySimpleStructObjectInspector objectInspector = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, LazySerDeParameters.DefaultSeparators, new Text("\\N"), false, false, (byte) '\\');
DefaultAccumuloRowIdFactory rowIdFactory = new DefaultAccumuloRowIdFactory();
Properties props = new Properties();
props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:cq1,cf:cq2,cf:cq3");
props.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
AccumuloSerDeParameters params = new AccumuloSerDeParameters(new Configuration(), props, AccumuloSerDe.class.getName());
rowIdFactory.init(params, props);
ColumnMapper columnMapper = params.getColumnMapper();
LazyAccumuloRow lazyRow = new LazyAccumuloRow(objectInspector);
AccumuloHiveRow hiveRow = new AccumuloHiveRow("1");
hiveRow.add("cf", "cq1", "foo".getBytes());
hiveRow.add("cf", "cq3", "bar".getBytes());
lazyRow.init(hiveRow, columnMapper.getColumnMappings(), rowIdFactory);
// Noticed that we also suffer from the same issue as HIVE-3179
// Only want to call a field init'ed when it's non-NULL
// Check it twice, make sure we get null both times
Assert.assertEquals("{'row':'1','1':'foo','2':null,'3':'bar'}".replace('\'', '"'), SerDeUtils.getJSONString(lazyRow, objectInspector));
Assert.assertEquals("{'row':'1','1':'foo','2':null,'3':'bar'}".replace('\'', '"'), SerDeUtils.getJSONString(lazyRow, objectInspector));
}
use of org.apache.hadoop.hive.accumulo.columns.ColumnMapper in project hive by apache.
the class TestLazyAccumuloRow method testDeserializationOfBinaryEncoding.
@Test
public void testDeserializationOfBinaryEncoding() throws Exception {
List<String> columns = Arrays.asList("row", "given_name", "surname", "age", "weight", "height");
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo);
LazySimpleStructObjectInspector objectInspector = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, LazySerDeParameters.DefaultSeparators, new Text("\\N"), false, false, (byte) '\\');
DefaultAccumuloRowIdFactory rowIdFactory = new DefaultAccumuloRowIdFactory();
Properties props = new Properties();
props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid#s,personal:given_name#s,personal:surname#s,personal:age,personal:weight,personal:height");
props.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
props.setProperty(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, ColumnEncoding.BINARY.getName());
AccumuloSerDeParameters params = new AccumuloSerDeParameters(new Configuration(), props, AccumuloSerDe.class.getName());
rowIdFactory.init(params, props);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream out = new DataOutputStream(baos);
LazyAccumuloRow lazyRow = new LazyAccumuloRow(objectInspector);
AccumuloHiveRow hiveRow = new AccumuloHiveRow("1");
hiveRow.add("personal", "given_name", "Bob".getBytes());
hiveRow.add("personal", "surname", "Stevens".getBytes());
out.writeInt(30);
hiveRow.add("personal", "age", baos.toByteArray());
baos.reset();
out.writeInt(200);
hiveRow.add("personal", "weight", baos.toByteArray());
baos.reset();
out.writeInt(72);
hiveRow.add("personal", "height", baos.toByteArray());
ColumnMapper columnMapper = params.getColumnMapper();
lazyRow.init(hiveRow, columnMapper.getColumnMappings(), rowIdFactory);
Object o = lazyRow.getField(0);
Assert.assertNotNull(o);
Assert.assertEquals(LazyString.class, o.getClass());
Assert.assertEquals("1", ((LazyString) o).toString());
o = lazyRow.getField(1);
Assert.assertNotNull(o);
Assert.assertEquals(LazyString.class, o.getClass());
Assert.assertEquals("Bob", ((LazyString) o).toString());
o = lazyRow.getField(2);
Assert.assertNotNull(o);
Assert.assertEquals(LazyString.class, o.getClass());
Assert.assertEquals("Stevens", ((LazyString) o).toString());
o = lazyRow.getField(3);
Assert.assertNotNull(o);
Assert.assertEquals(LazyDioInteger.class, o.getClass());
Assert.assertEquals("30", ((LazyDioInteger) o).toString());
o = lazyRow.getField(4);
Assert.assertNotNull(o);
Assert.assertEquals(LazyDioInteger.class, o.getClass());
Assert.assertEquals("200", ((LazyDioInteger) o).toString());
o = lazyRow.getField(5);
Assert.assertNotNull(o);
Assert.assertEquals(LazyDioInteger.class, o.getClass());
Assert.assertEquals("72", ((LazyDioInteger) o).toString());
}
use of org.apache.hadoop.hive.accumulo.columns.ColumnMapper in project hive by apache.
the class TestLazyAccumuloRow method testExpectedDeserializationOfColumns.
@Test
public void testExpectedDeserializationOfColumns() throws Exception {
List<String> columns = Arrays.asList("row", "given_name", "surname", "age", "weight", "height");
List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo);
LazySimpleStructObjectInspector objectInspector = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, LazySerDeParameters.DefaultSeparators, new Text("\\N"), false, false, (byte) '\\');
DefaultAccumuloRowIdFactory rowIdFactory = new DefaultAccumuloRowIdFactory();
Properties props = new Properties();
props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,personal:given_name,personal:surname,personal:age,personal:weight,personal:height");
props.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
AccumuloSerDeParameters params = new AccumuloSerDeParameters(new Configuration(), props, AccumuloSerDe.class.getName());
rowIdFactory.init(params, props);
LazyAccumuloRow lazyRow = new LazyAccumuloRow(objectInspector);
AccumuloHiveRow hiveRow = new AccumuloHiveRow("1");
hiveRow.add("personal", "given_name", "Bob".getBytes());
hiveRow.add("personal", "surname", "Stevens".getBytes());
hiveRow.add("personal", "age", "30".getBytes());
hiveRow.add("personal", "weight", "200".getBytes());
hiveRow.add("personal", "height", "72".getBytes());
ColumnMapper columnMapper = params.getColumnMapper();
lazyRow.init(hiveRow, columnMapper.getColumnMappings(), rowIdFactory);
Object o = lazyRow.getField(0);
Assert.assertEquals(LazyString.class, o.getClass());
Assert.assertEquals("1", ((LazyString) o).toString());
o = lazyRow.getField(1);
Assert.assertEquals(LazyString.class, o.getClass());
Assert.assertEquals("Bob", ((LazyString) o).toString());
o = lazyRow.getField(2);
Assert.assertEquals(LazyString.class, o.getClass());
Assert.assertEquals("Stevens", ((LazyString) o).toString());
o = lazyRow.getField(3);
Assert.assertEquals(LazyInteger.class, o.getClass());
Assert.assertEquals("30", ((LazyInteger) o).toString());
o = lazyRow.getField(4);
Assert.assertEquals(LazyInteger.class, o.getClass());
Assert.assertEquals("200", ((LazyInteger) o).toString());
o = lazyRow.getField(5);
Assert.assertEquals(LazyInteger.class, o.getClass());
Assert.assertEquals("72", ((LazyInteger) o).toString());
}
use of org.apache.hadoop.hive.accumulo.columns.ColumnMapper in project hive by apache.
the class TestHiveAccumuloTableInputFormat method testConfigureAccumuloInputFormatWithAuthorizations.
@Test
public void testConfigureAccumuloInputFormatWithAuthorizations() throws Exception {
AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf);
conf.set(AccumuloSerDeParameters.AUTHORIZATIONS_KEY, "foo,bar");
ColumnMapper columnMapper = new ColumnMapper(conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), columnNames, columnTypes);
Set<Pair<Text, Text>> cfCqPairs = inputformat.getPairCollection(columnMapper.getColumnMappings());
List<IteratorSetting> iterators = Collections.emptyList();
Set<Range> ranges = Collections.singleton(new Range());
String instanceName = "realInstance";
String zookeepers = "host1:2181,host2:2181,host3:2181";
ZooKeeperInstance zkInstance = Mockito.mock(ZooKeeperInstance.class);
HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class);
// Stub out the ZKI mock
Mockito.when(zkInstance.getInstanceName()).thenReturn(instanceName);
Mockito.when(zkInstance.getZooKeepers()).thenReturn(zookeepers);
// Call out to the real configure method
Mockito.doCallRealMethod().when(mockInputFormat).configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);
// Also compute the correct cf:cq pairs so we can assert the right argument was passed
Mockito.doCallRealMethod().when(mockInputFormat).getPairCollection(columnMapper.getColumnMappings());
mockInputFormat.configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);
// Verify that the correct methods are invoked on AccumuloInputFormat
Mockito.verify(mockInputFormat).setZooKeeperInstance(conf, instanceName, zookeepers, false);
Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS));
Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE);
Mockito.verify(mockInputFormat).setScanAuthorizations(conf, new Authorizations("foo,bar"));
Mockito.verify(mockInputFormat).addIterators(conf, iterators);
Mockito.verify(mockInputFormat).setRanges(conf, ranges);
Mockito.verify(mockInputFormat).fetchColumns(conf, cfCqPairs);
}
use of org.apache.hadoop.hive.accumulo.columns.ColumnMapper in project hive by apache.
the class TestHiveAccumuloTableInputFormat method testConfigureAccumuloInputFormatWithEmptyColumns.
@Test
public void testConfigureAccumuloInputFormatWithEmptyColumns() throws Exception {
AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf);
ColumnMapper columnMapper = new ColumnMapper(conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), columnNames, columnTypes);
HashSet<Pair<Text, Text>> cfCqPairs = Sets.newHashSet();
List<IteratorSetting> iterators = new ArrayList<IteratorSetting>();
Set<Range> ranges = Collections.singleton(new Range());
String instanceName = "realInstance";
String zookeepers = "host1:2181,host2:2181,host3:2181";
IteratorSetting cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class);
cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, StringCompare.class.getName());
cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName());
cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "dave");
cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:name");
iterators.add(cfg);
cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class);
cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, IntCompare.class.getName());
cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName());
cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "50");
cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:age");
iterators.add(cfg);
ZooKeeperInstance zkInstance = Mockito.mock(ZooKeeperInstance.class);
HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class);
// Stub out the ZKI mock
Mockito.when(zkInstance.getInstanceName()).thenReturn(instanceName);
Mockito.when(zkInstance.getZooKeepers()).thenReturn(zookeepers);
Mockito.when(mockInputFormat.getPairCollection(columnMapper.getColumnMappings())).thenReturn(cfCqPairs);
// Call out to the real configure method
Mockito.doCallRealMethod().when(mockInputFormat).configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);
// Also compute the correct cf:cq pairs so we can assert the right argument was passed
Mockito.doCallRealMethod().when(mockInputFormat).getPairCollection(columnMapper.getColumnMappings());
mockInputFormat.configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);
// Verify that the correct methods are invoked on AccumuloInputFormat
Mockito.verify(mockInputFormat).setZooKeeperInstance(conf, instanceName, zookeepers, false);
Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS));
Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE);
Mockito.verify(mockInputFormat).setScanAuthorizations(conf, con.securityOperations().getUserAuthorizations(USER));
Mockito.verify(mockInputFormat).addIterators(conf, iterators);
Mockito.verify(mockInputFormat).setRanges(conf, ranges);
// fetchColumns is not called because we had no columns to fetch
}
Aggregations