Search in sources :

Example 1 with DefaultAccumuloRowIdFactory

use of org.apache.hadoop.hive.accumulo.serde.DefaultAccumuloRowIdFactory in project hive by apache.

the class TestLazyAccumuloRow method testNullInit.

@Test
public void testNullInit() throws SerDeException {
    List<String> columns = Arrays.asList("row", "1", "2", "3");
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME), TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME), TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME), TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME));
    LazySimpleStructObjectInspector objectInspector = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, LazySerDeParameters.DefaultSeparators, new Text("\\N"), false, false, (byte) '\\');
    DefaultAccumuloRowIdFactory rowIdFactory = new DefaultAccumuloRowIdFactory();
    Properties props = new Properties();
    props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:cq1,cf:cq2,cf:cq3");
    props.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
    AccumuloSerDeParameters params = new AccumuloSerDeParameters(new Configuration(), props, AccumuloSerDe.class.getName());
    rowIdFactory.init(params, props);
    ColumnMapper columnMapper = params.getColumnMapper();
    LazyAccumuloRow lazyRow = new LazyAccumuloRow(objectInspector);
    AccumuloHiveRow hiveRow = new AccumuloHiveRow("1");
    hiveRow.add("cf", "cq1", "foo".getBytes());
    hiveRow.add("cf", "cq3", "bar".getBytes());
    lazyRow.init(hiveRow, columnMapper.getColumnMappings(), rowIdFactory);
    // Noticed that we also suffer from the same issue as HIVE-3179
    // Only want to call a field init'ed when it's non-NULL
    // Check it twice, make sure we get null both times
    Assert.assertEquals("{'row':'1','1':'foo','2':null,'3':'bar'}".replace('\'', '"'), SerDeUtils.getJSONString(lazyRow, objectInspector));
    Assert.assertEquals("{'row':'1','1':'foo','2':null,'3':'bar'}".replace('\'', '"'), SerDeUtils.getJSONString(lazyRow, objectInspector));
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) Text(org.apache.hadoop.io.Text) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) DefaultAccumuloRowIdFactory(org.apache.hadoop.hive.accumulo.serde.DefaultAccumuloRowIdFactory) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) AccumuloSerDe(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe) AccumuloSerDeParameters(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Test(org.junit.Test)

Example 2 with DefaultAccumuloRowIdFactory

use of org.apache.hadoop.hive.accumulo.serde.DefaultAccumuloRowIdFactory in project hive by apache.

the class TestLazyAccumuloRow method testExpectedDeserializationOfColumns.

@Test
public void testExpectedDeserializationOfColumns() throws Exception {
    List<String> columns = Arrays.asList("row", "given_name", "surname", "age", "weight", "height");
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo);
    LazySimpleStructObjectInspector objectInspector = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, LazySerDeParameters.DefaultSeparators, new Text("\\N"), false, false, (byte) '\\');
    DefaultAccumuloRowIdFactory rowIdFactory = new DefaultAccumuloRowIdFactory();
    Properties props = new Properties();
    props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,personal:given_name,personal:surname,personal:age,personal:weight,personal:height");
    props.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
    AccumuloSerDeParameters params = new AccumuloSerDeParameters(new Configuration(), props, AccumuloSerDe.class.getName());
    rowIdFactory.init(params, props);
    LazyAccumuloRow lazyRow = new LazyAccumuloRow(objectInspector);
    AccumuloHiveRow hiveRow = new AccumuloHiveRow("1");
    hiveRow.add("personal", "given_name", "Bob".getBytes());
    hiveRow.add("personal", "surname", "Stevens".getBytes());
    hiveRow.add("personal", "age", "30".getBytes());
    hiveRow.add("personal", "weight", "200".getBytes());
    hiveRow.add("personal", "height", "72".getBytes());
    ColumnMapper columnMapper = params.getColumnMapper();
    lazyRow.init(hiveRow, columnMapper.getColumnMappings(), rowIdFactory);
    Object o = lazyRow.getField(0);
    Assert.assertEquals(LazyString.class, o.getClass());
    Assert.assertEquals("1", ((LazyString) o).toString());
    o = lazyRow.getField(1);
    Assert.assertEquals(LazyString.class, o.getClass());
    Assert.assertEquals("Bob", ((LazyString) o).toString());
    o = lazyRow.getField(2);
    Assert.assertEquals(LazyString.class, o.getClass());
    Assert.assertEquals("Stevens", ((LazyString) o).toString());
    o = lazyRow.getField(3);
    Assert.assertEquals(LazyInteger.class, o.getClass());
    Assert.assertEquals("30", ((LazyInteger) o).toString());
    o = lazyRow.getField(4);
    Assert.assertEquals(LazyInteger.class, o.getClass());
    Assert.assertEquals("200", ((LazyInteger) o).toString());
    o = lazyRow.getField(5);
    Assert.assertEquals(LazyInteger.class, o.getClass());
    Assert.assertEquals("72", ((LazyInteger) o).toString());
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) Text(org.apache.hadoop.io.Text) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) DefaultAccumuloRowIdFactory(org.apache.hadoop.hive.accumulo.serde.DefaultAccumuloRowIdFactory) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) AccumuloSerDe(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe) AccumuloSerDeParameters(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Test(org.junit.Test)

Example 3 with DefaultAccumuloRowIdFactory

use of org.apache.hadoop.hive.accumulo.serde.DefaultAccumuloRowIdFactory in project hive by apache.

the class TestLazyAccumuloRow method testDeserializationOfBinaryEncoding.

@Test
public void testDeserializationOfBinaryEncoding() throws Exception {
    List<String> columns = Arrays.asList("row", "given_name", "surname", "age", "weight", "height");
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo);
    LazySimpleStructObjectInspector objectInspector = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, LazySerDeParameters.DefaultSeparators, new Text("\\N"), false, false, (byte) '\\');
    DefaultAccumuloRowIdFactory rowIdFactory = new DefaultAccumuloRowIdFactory();
    Properties props = new Properties();
    props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid#s,personal:given_name#s,personal:surname#s,personal:age,personal:weight,personal:height");
    props.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
    props.setProperty(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, ColumnEncoding.BINARY.getName());
    AccumuloSerDeParameters params = new AccumuloSerDeParameters(new Configuration(), props, AccumuloSerDe.class.getName());
    rowIdFactory.init(params, props);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataOutputStream out = new DataOutputStream(baos);
    LazyAccumuloRow lazyRow = new LazyAccumuloRow(objectInspector);
    AccumuloHiveRow hiveRow = new AccumuloHiveRow("1");
    hiveRow.add("personal", "given_name", "Bob".getBytes());
    hiveRow.add("personal", "surname", "Stevens".getBytes());
    out.writeInt(30);
    hiveRow.add("personal", "age", baos.toByteArray());
    baos.reset();
    out.writeInt(200);
    hiveRow.add("personal", "weight", baos.toByteArray());
    baos.reset();
    out.writeInt(72);
    hiveRow.add("personal", "height", baos.toByteArray());
    ColumnMapper columnMapper = params.getColumnMapper();
    lazyRow.init(hiveRow, columnMapper.getColumnMappings(), rowIdFactory);
    Object o = lazyRow.getField(0);
    Assert.assertNotNull(o);
    Assert.assertEquals(LazyString.class, o.getClass());
    Assert.assertEquals("1", ((LazyString) o).toString());
    o = lazyRow.getField(1);
    Assert.assertNotNull(o);
    Assert.assertEquals(LazyString.class, o.getClass());
    Assert.assertEquals("Bob", ((LazyString) o).toString());
    o = lazyRow.getField(2);
    Assert.assertNotNull(o);
    Assert.assertEquals(LazyString.class, o.getClass());
    Assert.assertEquals("Stevens", ((LazyString) o).toString());
    o = lazyRow.getField(3);
    Assert.assertNotNull(o);
    Assert.assertEquals(LazyDioInteger.class, o.getClass());
    Assert.assertEquals("30", ((LazyDioInteger) o).toString());
    o = lazyRow.getField(4);
    Assert.assertNotNull(o);
    Assert.assertEquals(LazyDioInteger.class, o.getClass());
    Assert.assertEquals("200", ((LazyDioInteger) o).toString());
    o = lazyRow.getField(5);
    Assert.assertNotNull(o);
    Assert.assertEquals(LazyDioInteger.class, o.getClass());
    Assert.assertEquals("72", ((LazyDioInteger) o).toString());
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) DataOutputStream(java.io.DataOutputStream) Text(org.apache.hadoop.io.Text) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) DefaultAccumuloRowIdFactory(org.apache.hadoop.hive.accumulo.serde.DefaultAccumuloRowIdFactory) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) AccumuloSerDe(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe) AccumuloSerDeParameters(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Test(org.junit.Test)

Aggregations

Properties (java.util.Properties)3 Configuration (org.apache.hadoop.conf.Configuration)3 ColumnMapper (org.apache.hadoop.hive.accumulo.columns.ColumnMapper)3 AccumuloSerDe (org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe)3 AccumuloSerDeParameters (org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters)3 DefaultAccumuloRowIdFactory (org.apache.hadoop.hive.accumulo.serde.DefaultAccumuloRowIdFactory)3 LazyString (org.apache.hadoop.hive.serde2.lazy.LazyString)3 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)3 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)3 Text (org.apache.hadoop.io.Text)3 Test (org.junit.Test)3 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 DataOutputStream (java.io.DataOutputStream)1