Search in sources :

Example 11 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class LazySerDeParameters method extractColumnInfo.

/**
 * Extracts and set column names and column types from the table properties
 * @throws SerDeException
 */
public void extractColumnInfo(Configuration conf) throws SerDeException {
    // Read the configuration parameters
    String columnNameProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMNS);
    // NOTE: if "columns.types" is missing, all columns will be of String type
    String columnTypeProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    // Parse the configuration parameters
    String columnNameDelimiter = tableProperties.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tableProperties.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
    if (columnNameProperty != null && columnNameProperty.length() > 0) {
        columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
    } else {
        columnNames = new ArrayList<String>();
    }
    if (columnTypeProperty == null) {
        // Default type: all string
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < columnNames.size(); i++) {
            if (i > 0) {
                sb.append(":");
            }
            sb.append(serdeConstants.STRING_TYPE_NAME);
        }
        columnTypeProperty = sb.toString();
    }
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    // Insert time-zone for timestamp type
    if (conf != null) {
        final TimestampLocalTZTypeInfo tsTZTypeInfo = new TimestampLocalTZTypeInfo(conf.get(ConfVars.HIVE_LOCAL_TIME_ZONE.varname));
        for (int i = 0; i < columnTypes.size(); i++) {
            if (columnTypes.get(i) instanceof TimestampLocalTZTypeInfo) {
                columnTypes.set(i, tsTZTypeInfo);
            }
        }
    }
    if (columnNames.size() != columnTypes.size()) {
        throw new SerDeException(serdeName + ": columns has " + columnNames.size() + " elements while columns.types has " + columnTypes.size() + " elements!");
    }
}
Also used : TimestampLocalTZTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 12 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class TestLazyHBaseCellMap method testInitColumnPrefix.

@Test
public void testInitColumnPrefix() throws Exception {
    Text nullSequence = new Text("\\N");
    ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("map<string,string>").get(0), new byte[] { (byte) 1, (byte) 2 }, 0, nullSequence, false, (byte) 0);
    LazyHBaseCellMap b = new LazyHBaseCellMap((LazyMapObjectInspector) oi);
    // Initialize a result
    Cell[] cells = new KeyValue[2];
    final String col1 = "1";
    final String col2 = "2";
    cells[0] = new KeyValue(TEST_ROW, COLUMN_FAMILY, Bytes.toBytes(QUAL_PREFIX + col1), Bytes.toBytes("cfacol1"));
    cells[1] = new KeyValue(TEST_ROW, COLUMN_FAMILY, Bytes.toBytes(QUAL_PREFIX + col2), Bytes.toBytes("cfacol2"));
    Result r = Result.create(cells);
    List<Boolean> mapBinaryStorage = new ArrayList<Boolean>();
    mapBinaryStorage.add(false);
    mapBinaryStorage.add(false);
    b.init(r, COLUMN_FAMILY, mapBinaryStorage, Bytes.toBytes(QUAL_PREFIX), true);
    assertNotNull(b.getMapValueElement(new Text(col1)));
    assertNotNull(b.getMapValueElement(new Text(col2)));
}
Also used : LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) Cell(org.apache.hadoop.hbase.Cell) Result(org.apache.hadoop.hbase.client.Result) Test(org.junit.Test)

Example 13 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class TestLazyHBaseObject method testLazyHBaseCellMap2.

/**
 * Test the LazyMap class with String-to-String.
 * @throws SerDeException
 */
public void testLazyHBaseCellMap2() throws SerDeException {
    // Map of String to String
    Text nullSequence = new Text("\\N");
    ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("map<string,string>").get(0), new byte[] { (byte) '#', (byte) '\t' }, 0, nullSequence, false, (byte) 0);
    LazyHBaseCellMap b = new LazyHBaseCellMap((LazyMapObjectInspector) oi);
    // Initialize a result
    List<Cell> kvs = new ArrayList<Cell>();
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("col1"), Bytes.toBytes("cfacol1")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("col2"), Bytes.toBytes("cfacol2")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("2"), Bytes.toBytes("d\tf")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("-1"), Bytes.toBytes("")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("0"), Bytes.toBytes("0")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("8"), Bytes.toBytes("abc")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfc"), Bytes.toBytes("col3"), Bytes.toBytes("cfccol3")));
    Result r = Result.create(kvs);
    List<Boolean> mapBinaryStorage = new ArrayList<Boolean>();
    mapBinaryStorage.add(false);
    mapBinaryStorage.add(false);
    b.init(r, "cfb".getBytes(), mapBinaryStorage);
    assertEquals(new Text("d\tf"), ((LazyString) b.getMapValueElement(new Text("2"))).getWritableObject());
    assertNull(b.getMapValueElement(new Text("-1")));
    assertEquals(new Text("0"), ((LazyString) b.getMapValueElement(new Text("0"))).getWritableObject());
    assertEquals(new Text("abc"), ((LazyString) b.getMapValueElement(new Text("8"))).getWritableObject());
    assertNull(b.getMapValueElement(new Text("-")));
    assertEquals("{'0':'0','2':'d\\tf','8':'abc'}".replace('\'', '\"'), SerDeUtils.getJSONString(b, oi));
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) Cell(org.apache.hadoop.hbase.Cell) Result(org.apache.hadoop.hbase.client.Result)

Example 14 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class TestLazyHBaseObject method testLazyHBaseRow1.

/**
 * Test the LazyHBaseRow class with one-for-one mappings between
 * Hive fields and HBase columns.
 * @throws SerDeException
 */
public void testLazyHBaseRow1() throws SerDeException {
    List<TypeInfo> fieldTypeInfos = TypeInfoUtils.getTypeInfosFromTypeString("string,int,array<string>,map<string,string>,string");
    List<String> fieldNames = Arrays.asList("key", "a", "b", "c", "d");
    Text nullSequence = new Text("\\N");
    String hbaseColsMapping = ":key,cfa:a,cfa:b,cfb:c,cfb:d";
    ColumnMappings columnMappings = null;
    try {
        columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColsMapping);
    } catch (SerDeException e) {
        fail(e.toString());
    }
    for (ColumnMapping colMap : columnMappings) {
        if (!colMap.hbaseRowKey && colMap.qualifierName == null) {
            colMap.binaryStorage.add(false);
            colMap.binaryStorage.add(false);
        } else {
            colMap.binaryStorage.add(false);
        }
    }
    ObjectInspector oi = LazyFactory.createLazyStructInspector(fieldNames, fieldTypeInfos, new byte[] { ' ', ':', '=' }, nullSequence, false, false, (byte) 0);
    LazyHBaseRow o = new LazyHBaseRow((LazySimpleStructObjectInspector) oi, columnMappings);
    List<Cell> kvs = new ArrayList<Cell>();
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("a"), Bytes.toBytes("123")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("b"), Bytes.toBytes("a:b:c")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("c"), Bytes.toBytes("d=e:f=g")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("d"), Bytes.toBytes("hi")));
    Result r = Result.create(kvs);
    o.init(r);
    assertEquals(("{'key':'test-row','a':123,'b':['a','b','c']," + "'c':{'d':'e','f':'g'},'d':'hi'}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
    kvs.clear();
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("a"), Bytes.toBytes("123")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("c"), Bytes.toBytes("d=e:f=g")));
    r = Result.create(kvs);
    o.init(r);
    assertEquals(("{'key':'test-row','a':123,'b':null," + "'c':{'d':'e','f':'g'},'d':null}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
    kvs.clear();
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("b"), Bytes.toBytes("a")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("c"), Bytes.toBytes("d=\\N:f=g:h")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("d"), Bytes.toBytes("no")));
    r = Result.create(kvs);
    o.init(r);
    assertEquals(("{'key':'test-row','a':null,'b':['a']," + "'c':{'d':null,'f':'g','h':null},'d':'no'}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
    kvs.clear();
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("b"), Bytes.toBytes(":a::")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("d"), Bytes.toBytes("no")));
    r = Result.create(kvs);
    o.init(r);
    assertEquals(("{'key':'test-row','a':null,'b':['','a','','']," + "'c':null,'d':'no'}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
    // This is intentionally duplicated because of HIVE-3179
    assertEquals(("{'key':'test-row','a':null,'b':['','a','','']," + "'c':null,'d':'no'}").replace("'", "\""), SerDeUtils.getJSONString(o, oi));
    kvs.clear();
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("a"), Bytes.toBytes("123")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("b"), Bytes.toBytes("")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("c"), Bytes.toBytes("")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("d"), Bytes.toBytes("")));
    r = Result.create(kvs);
    o.init(r);
    assertEquals("{'key':'test-row','a':123,'b':[],'c':{},'d':''}".replace("'", "\""), SerDeUtils.getJSONString(o, oi));
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) Result(org.apache.hadoop.hbase.client.Result) Cell(org.apache.hadoop.hbase.Cell) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) ColumnMapping(org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping)

Example 15 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class TestLazyHBaseObject method testLazyHBaseRow3.

/**
 * Test the LazyHBaseRow class with a one-to-one/onto mapping between Hive columns and
 * HBase column family/column qualifier pairs. The column types are primitive and fields
 * are stored in binary format in HBase.
 * @throws SerDeException
 */
public void testLazyHBaseRow3() throws SerDeException {
    List<TypeInfo> fieldTypeInfos = TypeInfoUtils.getTypeInfosFromTypeString("string,int,tinyint,smallint,bigint,float,double,string,boolean");
    List<String> fieldNames = Arrays.asList(new String[] { "key", "c_int", "c_byte", "c_short", "c_long", "c_float", "c_double", "c_string", "c_bool" });
    Text nullSequence = new Text("\\N");
    String hbaseColumnsMapping = ":key#str,cf-int:cq-int#bin,cf-byte:cq-byte#bin," + "cf-short:cq-short#bin,cf-long:cq-long#bin,cf-float:cq-float#bin,cf-double:cq-double#bin," + "cf-string:cq-string#str,cf-bool:cq-bool#bin";
    ColumnMappings columnMappings = null;
    try {
        columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping);
    } catch (SerDeException e) {
        fail(e.toString());
    }
    ColumnMapping[] columnsMapping = columnMappings.getColumnsMapping();
    for (int i = 0; i < columnsMapping.length; i++) {
        ColumnMapping colMap = columnsMapping[i];
        if (i == 0 || i == 7) {
            colMap.binaryStorage.add(false);
        } else {
            colMap.binaryStorage.add(true);
        }
    }
    ObjectInspector oi = LazyFactory.createLazyStructInspector(fieldNames, fieldTypeInfos, new byte[] { ' ', ':', '=' }, nullSequence, false, false, (byte) 0);
    LazyHBaseRow o = new LazyHBaseRow((LazySimpleStructObjectInspector) oi, columnMappings);
    byte[] rowKey = "row-key".getBytes();
    List<Cell> kvs = new ArrayList<Cell>();
    byte[] value;
    for (int i = 1; i < columnsMapping.length; i++) {
        switch(i) {
            case 1:
                value = Bytes.toBytes(1);
                break;
            case 2:
                value = new byte[] { (byte) 1 };
                break;
            case 3:
                value = Bytes.toBytes((short) 1);
                break;
            case 4:
                value = Bytes.toBytes((long) 1);
                break;
            case 5:
                value = Bytes.toBytes((float) 1.0F);
                break;
            case 6:
                value = Bytes.toBytes((double) 1.0);
                break;
            case 7:
                value = "Hadoop, Hive, with HBase storage handler.".getBytes();
                break;
            case 8:
                value = Bytes.toBytes(true);
                break;
            default:
                throw new RuntimeException("Not expected: " + i);
        }
        ColumnMapping colMap = columnsMapping[i];
        kvs.add(new KeyValue(rowKey, colMap.familyNameBytes, colMap.qualifierNameBytes, value));
    }
    Collections.sort(kvs, KeyValue.COMPARATOR);
    Result result = Result.create(kvs);
    o.init(result);
    List<? extends StructField> fieldRefs = ((StructObjectInspector) oi).getAllStructFieldRefs();
    for (int i = 0; i < fieldRefs.size(); i++) {
        Object fieldData = ((StructObjectInspector) oi).getStructFieldData(o, fieldRefs.get(i));
        assert (fieldData != null);
        assert (fieldData instanceof LazyPrimitive<?, ?>);
        Writable writable = ((LazyPrimitive<?, ?>) fieldData).getWritableObject();
        switch(i) {
            case 0:
                Text text = new Text("row-key");
                assertEquals(text, writable);
                break;
            case 1:
                IntWritable iw = new IntWritable(1);
                assertEquals(iw, writable);
                break;
            case 2:
                ByteWritable bw = new ByteWritable((byte) 1);
                assertEquals(bw, writable);
                break;
            case 3:
                ShortWritable sw = new ShortWritable((short) 1);
                assertEquals(sw, writable);
                break;
            case 4:
                LongWritable lw = new LongWritable(1);
                assertEquals(lw, writable);
                break;
            case 5:
                FloatWritable fw = new FloatWritable(1.0F);
                assertEquals(fw, writable);
                break;
            case 6:
                DoubleWritable dw = new DoubleWritable(1.0);
                assertEquals(dw, writable);
                break;
            case 7:
                Text t = new Text("Hadoop, Hive, with HBase storage handler.");
                assertEquals(t, writable);
                break;
            case 8:
                BooleanWritable boolWritable = new BooleanWritable(true);
                assertEquals(boolWritable, writable);
                break;
            default:
                fail("Error: Unanticipated value in deserializing fields for HBaseSerDe.");
                break;
        }
    }
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayList(java.util.ArrayList) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) IntWritable(org.apache.hadoop.io.IntWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) LazyPrimitive(org.apache.hadoop.hive.serde2.lazy.LazyPrimitive) Result(org.apache.hadoop.hbase.client.Result) LongWritable(org.apache.hadoop.io.LongWritable) Cell(org.apache.hadoop.hbase.Cell) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) ColumnMapping(org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping) IntWritable(org.apache.hadoop.io.IntWritable) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Text(org.apache.hadoop.io.Text) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)23 ArrayList (java.util.ArrayList)18 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)17 Text (org.apache.hadoop.io.Text)14 LazyMapObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector)11 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)9 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)9 Cell (org.apache.hadoop.hbase.Cell)7 KeyValue (org.apache.hadoop.hbase.KeyValue)7 Result (org.apache.hadoop.hbase.client.Result)7 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)7 IntWritable (org.apache.hadoop.io.IntWritable)7 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)6 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)5 Test (org.junit.Test)5 HiveAccumuloMapColumnMapping (org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping)4 IOException (java.io.IOException)3 ColumnMapping (org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping)2 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)2 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)2