Search in sources :

Example 31 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class TestLazyHBaseObject method testLazyHBaseCellMap1.

/**
 * Test the LazyMap class with Integer-to-String.
 * @throws SerDeException
 */
public void testLazyHBaseCellMap1() throws SerDeException {
    // Map of Integer to String
    Text nullSequence = new Text("\\N");
    ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("map<int,string>").get(0), new byte[] { (byte) 1, (byte) 2 }, 0, nullSequence, false, (byte) 0);
    LazyHBaseCellMap b = new LazyHBaseCellMap((LazyMapObjectInspector) oi);
    // Initialize a result
    List<Cell> kvs = new ArrayList<Cell>();
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("col1"), Bytes.toBytes("cfacol1")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfa"), Bytes.toBytes("col2"), Bytes.toBytes("cfacol2")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("2"), Bytes.toBytes("def")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("-1"), Bytes.toBytes("")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("0"), Bytes.toBytes("0")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfb"), Bytes.toBytes("8"), Bytes.toBytes("abc")));
    kvs.add(new KeyValue(Bytes.toBytes("test-row"), Bytes.toBytes("cfc"), Bytes.toBytes("col3"), Bytes.toBytes("cfccol3")));
    Result r = Result.create(kvs);
    List<Boolean> mapBinaryStorage = new ArrayList<Boolean>();
    mapBinaryStorage.add(false);
    mapBinaryStorage.add(false);
    b.init(r, "cfb".getBytes(), mapBinaryStorage);
    assertEquals(new Text("def"), ((LazyString) b.getMapValueElement(new IntWritable(2))).getWritableObject());
    assertNull(b.getMapValueElement(new IntWritable(-1)));
    assertEquals(new Text("0"), ((LazyString) b.getMapValueElement(new IntWritable(0))).getWritableObject());
    assertEquals(new Text("abc"), ((LazyString) b.getMapValueElement(new IntWritable(8))).getWritableObject());
    assertNull(b.getMapValueElement(new IntWritable(12345)));
    assertEquals("{0:'0',2:'def',8:'abc'}".replace('\'', '\"'), SerDeUtils.getJSONString(b, oi));
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) KeyValue(org.apache.hadoop.hbase.KeyValue) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) Cell(org.apache.hadoop.hbase.Cell) IntWritable(org.apache.hadoop.io.IntWritable) Result(org.apache.hadoop.hbase.client.Result)

Example 32 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class JsonSerDe method initialize.

@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    List<TypeInfo> columnTypes;
    StructTypeInfo rowTypeInfo;
    LOG.debug("Initializing JsonSerDe: {}", tbl.entrySet());
    // Get column names and types
    String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
    String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
    // all table column names
    if (columnNameProperty.isEmpty()) {
        columnNames = Collections.emptyList();
    } else {
        columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
    }
    // all column types
    if (columnTypeProperty.isEmpty()) {
        columnTypes = Collections.emptyList();
    } else {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    LOG.debug("columns: {}, {}", columnNameProperty, columnNames);
    LOG.debug("types: {}, {} ", columnTypeProperty, columnTypes);
    assert (columnNames.size() == columnTypes.size());
    rowTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    cachedObjectInspector = HCatRecordObjectInspectorFactory.getHCatRecordObjectInspector(rowTypeInfo);
    try {
        schema = HCatSchemaUtils.getHCatSchema(rowTypeInfo).get(0).getStructSubSchema();
        LOG.debug("schema : {}", schema);
        LOG.debug("fields : {}", schema.getFieldNames());
    } catch (HCatException e) {
        throw new SerDeException(e);
    }
    jsonFactory = new JsonFactory();
    tsParser = new TimestampParser(HiveStringUtils.splitAndUnEscape(tbl.getProperty(serdeConstants.TIMESTAMP_FORMATS)));
}
Also used : TimestampParser(org.apache.hive.common.util.TimestampParser) HCatException(org.apache.hive.hcatalog.common.HCatException) JsonFactory(org.codehaus.jackson.JsonFactory) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) BaseCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 33 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class AvroSerdeUtils method determineSchemaOrThrowException.

/**
 * Determine the schema to that's been provided for Avro serde work.
 * @param properties containing a key pointing to the schema, one way or another
 * @return schema to use while serdeing the avro file
 * @throws IOException if error while trying to read the schema from another location
 * @throws AvroSerdeException if unable to find a schema or pointer to it in the properties
 */
public static Schema determineSchemaOrThrowException(Configuration conf, Properties properties) throws IOException, AvroSerdeException {
    String schemaString = properties.getProperty(AvroTableProperties.SCHEMA_LITERAL.getPropName());
    if (schemaString != null && !schemaString.equals(SCHEMA_NONE))
        return AvroSerdeUtils.getSchemaFor(schemaString);
    // Try pulling directly from URL
    schemaString = properties.getProperty(AvroTableProperties.SCHEMA_URL.getPropName());
    if (schemaString == null) {
        final String columnNameProperty = properties.getProperty(serdeConstants.LIST_COLUMNS);
        final String columnTypeProperty = properties.getProperty(serdeConstants.LIST_COLUMN_TYPES);
        final String columnCommentProperty = properties.getProperty(AvroSerDe.LIST_COLUMN_COMMENTS);
        if (columnNameProperty == null || columnNameProperty.isEmpty() || columnTypeProperty == null || columnTypeProperty.isEmpty()) {
            throw new AvroSerdeException(EXCEPTION_MESSAGE);
        }
        final String columnNameDelimiter = properties.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? properties.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
        // Get column names and types
        List<String> columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
        List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
        Schema schema = AvroSerDe.getSchemaFromCols(properties, columnNames, columnTypes, columnCommentProperty);
        properties.setProperty(AvroTableProperties.SCHEMA_LITERAL.getPropName(), schema.toString());
        if (conf != null)
            conf.set(AvroTableProperties.AVRO_SERDE_SCHEMA.getPropName(), schema.toString(false));
        return schema;
    } else if (schemaString.equals(SCHEMA_NONE)) {
        throw new AvroSerdeException(EXCEPTION_MESSAGE);
    }
    try {
        Schema s = getSchemaFromFS(schemaString, conf);
        if (s == null) {
            // in case schema is not a file system
            return AvroSerdeUtils.getSchemaFor(new URL(schemaString));
        }
        return s;
    } catch (IOException ioe) {
        throw new AvroSerdeException("Unable to read schema from given path: " + schemaString, ioe);
    } catch (URISyntaxException urie) {
        throw new AvroSerdeException("Unable to read schema from given path: " + schemaString, urie);
    }
}
Also used : Schema(org.apache.avro.Schema) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) URL(java.net.URL)

Example 34 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class ThriftJDBCBinarySerDe method initialize.

@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    // Get column names
    MAX_BUFFERED_ROWS = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_DEFAULT_FETCH_SIZE);
    LOG.info("ThriftJDBCBinarySerDe max number of buffered columns: " + MAX_BUFFERED_ROWS);
    String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
    String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
    if (columnNameProperty.length() == 0) {
        columnNames = new ArrayList<String>();
    } else {
        columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
    }
    if (columnTypeProperty.length() == 0) {
        columnTypes = new ArrayList<TypeInfo>();
    } else {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    rowObjectInspector = (StructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(rowTypeInfo);
    initializeRowAndColumns();
    try {
        thriftFormatter.initialize(conf, tbl);
    } catch (Exception e) {
        throw new SerDeException(e);
    }
}
Also used : TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) TException(org.apache.thrift.TException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 35 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class LazyBinarySerDe method initialize.

/**
 * Initialize the SerDe with configuration and table information.
 */
@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    // Get column names and types
    String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
    String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
    String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    if (columnNameProperty.length() == 0) {
        columnNames = new ArrayList<String>();
    } else {
        columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
    }
    if (columnTypeProperty.length() == 0) {
        columnTypes = new ArrayList<TypeInfo>();
    } else {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    assert (columnNames.size() == columnTypes.size());
    // Create row related objects
    rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    // Create the object inspector and the lazy binary struct object
    cachedObjectInspector = LazyBinaryUtils.getLazyBinaryObjectInspectorFromTypeInfo(rowTypeInfo);
    cachedLazyBinaryStruct = (LazyBinaryStruct) LazyBinaryFactory.createLazyBinaryObject(cachedObjectInspector);
    // output debug info
    LOG.debug("LazyBinarySerDe initialized with: columnNames=" + columnNames + " columnTypes=" + columnTypes);
    serializedSize = 0;
    stats = new SerDeStats();
    lastOperationSerialize = false;
    lastOperationDeserialize = false;
}
Also used : SerDeStats(org.apache.hadoop.hive.serde2.SerDeStats) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Aggregations

TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)23 ArrayList (java.util.ArrayList)18 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)17 Text (org.apache.hadoop.io.Text)14 LazyMapObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector)11 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)9 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)9 Cell (org.apache.hadoop.hbase.Cell)7 KeyValue (org.apache.hadoop.hbase.KeyValue)7 Result (org.apache.hadoop.hbase.client.Result)7 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)7 IntWritable (org.apache.hadoop.io.IntWritable)7 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)6 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)5 Test (org.junit.Test)5 HiveAccumuloMapColumnMapping (org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping)4 IOException (java.io.IOException)3 ColumnMapping (org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping)2 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)2 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)2