Search in sources :

Example 51 with TypeInfoFactory.stringTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.

the class TestLazyAccumuloRow method testDeserializationOfBinaryEncoding.

@Test
public void testDeserializationOfBinaryEncoding() throws Exception {
    List<String> columns = Arrays.asList("row", "given_name", "surname", "age", "weight", "height");
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo);
    LazySimpleStructObjectInspector objectInspector = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, LazySerDeParameters.DefaultSeparators, new Text("\\N"), false, false, (byte) '\\');
    DefaultAccumuloRowIdFactory rowIdFactory = new DefaultAccumuloRowIdFactory();
    Properties props = new Properties();
    props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid#s,personal:given_name#s,personal:surname#s,personal:age,personal:weight,personal:height");
    props.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
    props.setProperty(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, ColumnEncoding.BINARY.getName());
    AccumuloSerDeParameters params = new AccumuloSerDeParameters(new Configuration(), props, AccumuloSerDe.class.getName());
    rowIdFactory.init(params, props);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataOutputStream out = new DataOutputStream(baos);
    LazyAccumuloRow lazyRow = new LazyAccumuloRow(objectInspector);
    AccumuloHiveRow hiveRow = new AccumuloHiveRow("1");
    hiveRow.add("personal", "given_name", "Bob".getBytes());
    hiveRow.add("personal", "surname", "Stevens".getBytes());
    out.writeInt(30);
    hiveRow.add("personal", "age", baos.toByteArray());
    baos.reset();
    out.writeInt(200);
    hiveRow.add("personal", "weight", baos.toByteArray());
    baos.reset();
    out.writeInt(72);
    hiveRow.add("personal", "height", baos.toByteArray());
    ColumnMapper columnMapper = params.getColumnMapper();
    lazyRow.init(hiveRow, columnMapper.getColumnMappings(), rowIdFactory);
    Object o = lazyRow.getField(0);
    Assert.assertNotNull(o);
    Assert.assertEquals(LazyString.class, o.getClass());
    Assert.assertEquals("1", ((LazyString) o).toString());
    o = lazyRow.getField(1);
    Assert.assertNotNull(o);
    Assert.assertEquals(LazyString.class, o.getClass());
    Assert.assertEquals("Bob", ((LazyString) o).toString());
    o = lazyRow.getField(2);
    Assert.assertNotNull(o);
    Assert.assertEquals(LazyString.class, o.getClass());
    Assert.assertEquals("Stevens", ((LazyString) o).toString());
    o = lazyRow.getField(3);
    Assert.assertNotNull(o);
    Assert.assertEquals(LazyDioInteger.class, o.getClass());
    Assert.assertEquals("30", ((LazyDioInteger) o).toString());
    o = lazyRow.getField(4);
    Assert.assertNotNull(o);
    Assert.assertEquals(LazyDioInteger.class, o.getClass());
    Assert.assertEquals("200", ((LazyDioInteger) o).toString());
    o = lazyRow.getField(5);
    Assert.assertNotNull(o);
    Assert.assertEquals(LazyDioInteger.class, o.getClass());
    Assert.assertEquals("72", ((LazyDioInteger) o).toString());
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) DataOutputStream(java.io.DataOutputStream) Text(org.apache.hadoop.io.Text) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) DefaultAccumuloRowIdFactory(org.apache.hadoop.hive.accumulo.serde.DefaultAccumuloRowIdFactory) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) AccumuloSerDe(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe) AccumuloSerDeParameters(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Test(org.junit.Test)

Example 52 with TypeInfoFactory.stringTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.

the class TestLazyAccumuloRow method testExpectedDeserializationOfColumns.

@Test
public void testExpectedDeserializationOfColumns() throws Exception {
    List<String> columns = Arrays.asList("row", "given_name", "surname", "age", "weight", "height");
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo);
    LazySimpleStructObjectInspector objectInspector = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, LazySerDeParameters.DefaultSeparators, new Text("\\N"), false, false, (byte) '\\');
    DefaultAccumuloRowIdFactory rowIdFactory = new DefaultAccumuloRowIdFactory();
    Properties props = new Properties();
    props.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,personal:given_name,personal:surname,personal:age,personal:weight,personal:height");
    props.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
    AccumuloSerDeParameters params = new AccumuloSerDeParameters(new Configuration(), props, AccumuloSerDe.class.getName());
    rowIdFactory.init(params, props);
    LazyAccumuloRow lazyRow = new LazyAccumuloRow(objectInspector);
    AccumuloHiveRow hiveRow = new AccumuloHiveRow("1");
    hiveRow.add("personal", "given_name", "Bob".getBytes());
    hiveRow.add("personal", "surname", "Stevens".getBytes());
    hiveRow.add("personal", "age", "30".getBytes());
    hiveRow.add("personal", "weight", "200".getBytes());
    hiveRow.add("personal", "height", "72".getBytes());
    ColumnMapper columnMapper = params.getColumnMapper();
    lazyRow.init(hiveRow, columnMapper.getColumnMappings(), rowIdFactory);
    Object o = lazyRow.getField(0);
    Assert.assertEquals(LazyString.class, o.getClass());
    Assert.assertEquals("1", ((LazyString) o).toString());
    o = lazyRow.getField(1);
    Assert.assertEquals(LazyString.class, o.getClass());
    Assert.assertEquals("Bob", ((LazyString) o).toString());
    o = lazyRow.getField(2);
    Assert.assertEquals(LazyString.class, o.getClass());
    Assert.assertEquals("Stevens", ((LazyString) o).toString());
    o = lazyRow.getField(3);
    Assert.assertEquals(LazyInteger.class, o.getClass());
    Assert.assertEquals("30", ((LazyInteger) o).toString());
    o = lazyRow.getField(4);
    Assert.assertEquals(LazyInteger.class, o.getClass());
    Assert.assertEquals("200", ((LazyInteger) o).toString());
    o = lazyRow.getField(5);
    Assert.assertEquals(LazyInteger.class, o.getClass());
    Assert.assertEquals("72", ((LazyInteger) o).toString());
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) Text(org.apache.hadoop.io.Text) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) DefaultAccumuloRowIdFactory(org.apache.hadoop.hive.accumulo.serde.DefaultAccumuloRowIdFactory) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) AccumuloSerDe(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe) AccumuloSerDeParameters(org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Test(org.junit.Test)

Example 53 with TypeInfoFactory.stringTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.

the class TestAccumuloRowSerializer method testVisibilityLabel.

@Test
public void testVisibilityLabel() throws IOException, SerDeException {
    List<String> columns = Arrays.asList("row", "cq1", "cq2", "cq3");
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo);
    List<String> typeNames = new ArrayList<String>(types.size());
    for (TypeInfo type : types) {
        typeNames.add(type.getTypeName());
    }
    Properties tableProperties = new Properties();
    tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:cq1#b,cf:cq2#b,cf:cq3");
    tableProperties.setProperty(serdeConstants.FIELD_DELIM, " ");
    tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
    tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(typeNames));
    AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
    LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
    LazySimpleStructObjectInspector oi = (LazySimpleStructObjectInspector) LazyFactory.createLazyStructInspector(columns, types, serDeParams.getSeparators(), serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility("foo"), accumuloSerDeParams.getRowIdFactory());
    // Create the LazyStruct from the LazyStruct...Inspector
    LazyStruct obj = (LazyStruct) LazyFactory.createLazyObject(oi);
    ByteArrayRef byteRef = new ByteArrayRef();
    byteRef.setData(new byte[] { 'r', 'o', 'w', '1', ' ', '1', '0', ' ', '2', '0', ' ', 'v', 'a', 'l', 'u', 'e' });
    obj.init(byteRef, 0, byteRef.getData().length);
    Mutation m = (Mutation) serializer.serialize(obj, oi);
    Assert.assertArrayEquals("row1".getBytes(), m.getRow());
    List<ColumnUpdate> updates = m.getUpdates();
    Assert.assertEquals(3, updates.size());
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataOutputStream out = new DataOutputStream(baos);
    ColumnUpdate update = updates.get(0);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq1", new String(update.getColumnQualifier()));
    Assert.assertEquals("foo", new String(update.getColumnVisibility()));
    out.writeInt(10);
    Assert.assertArrayEquals(baos.toByteArray(), update.getValue());
    update = updates.get(1);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq2", new String(update.getColumnQualifier()));
    Assert.assertEquals("foo", new String(update.getColumnVisibility()));
    baos.reset();
    out.writeInt(20);
    Assert.assertArrayEquals(baos.toByteArray(), update.getValue());
    update = updates.get(2);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq3", new String(update.getColumnQualifier()));
    Assert.assertEquals("foo", new String(update.getColumnVisibility()));
    Assert.assertEquals("value", new String(update.getValue()));
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ColumnUpdate(org.apache.accumulo.core.data.ColumnUpdate) Configuration(org.apache.hadoop.conf.Configuration) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) DataOutputStream(java.io.DataOutputStream) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Properties(java.util.Properties) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) Mutation(org.apache.accumulo.core.data.Mutation) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct) Test(org.junit.Test)

Example 54 with TypeInfoFactory.stringTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.

the class TestGenericUDFRegexp method testNullConstant.

public void testNullConstant() throws HiveException {
    GenericUDFRegExp udf = new GenericUDFRegExp();
    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
    Text regexText = null;
    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, regexText);
    ObjectInspector[] arguments = { valueOI0, valueOI1 };
    udf.initialize(arguments);
    // null
    runAndVerifyConst("fofo", regexText, null, udf);
    runAndVerifyConst("fofofo", regexText, null, udf);
    runAndVerifyConst("fobar", regexText, null, udf);
    runAndVerifyConst(null, regexText, null, udf);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Text(org.apache.hadoop.io.Text)

Example 55 with TypeInfoFactory.stringTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.stringTypeInfo in project hive by apache.

the class TestGenericUDFSubstringIndex method testSubstringIndexConst.

public void testSubstringIndexConst() throws HiveException {
    GenericUDFSubstringIndex udf = new GenericUDFSubstringIndex();
    ObjectInspector valueOI0 = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
    Text delim = new Text(".");
    ObjectInspector valueOI1 = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.stringTypeInfo, delim);
    IntWritable count = new IntWritable(2);
    ObjectInspector valueOI2 = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector(TypeInfoFactory.intTypeInfo, count);
    ObjectInspector[] arguments = { valueOI0, valueOI1, valueOI2 };
    udf.initialize(arguments);
    runAndVerifyConst("www.apache.org", "www.apache", udf);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Text(org.apache.hadoop.io.Text) IntWritable(org.apache.hadoop.io.IntWritable)

Aggregations

Test (org.junit.Test)37 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)32 Text (org.apache.hadoop.io.Text)29 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)24 ArrayList (java.util.ArrayList)13 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)13 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)12 PrimitiveCategory (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory)12 Configuration (org.apache.hadoop.conf.Configuration)11 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)11 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)11 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)11 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)11 Properties (java.util.Properties)10 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)10 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)10 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)9 Mutation (org.apache.accumulo.core.data.Mutation)8 ByteArrayRef (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)8 LazySerDeParameters (org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)8