Search in sources :

Example 6 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project mongo-hadoop by mongodb.

the class BSONSerDe method initialize.

/**
     * Finds out the information of the table, including the column names and types.
     */
@SuppressWarnings("unchecked")
@Override
public void initialize(final Configuration conf, final Properties tblProps) throws SerDeException {
    // regex used to split column names between commas
    String splitCols = "\\s*,\\s*";
    // Get the table column names
    String colNamesStr = tblProps.getProperty(serdeConstants.LIST_COLUMNS);
    columnNames = Arrays.asList(colNamesStr.split(splitCols));
    // Get mappings specified by the user
    if (tblProps.containsKey(MONGO_COLS)) {
        String mongoFieldsStr = tblProps.getProperty(MONGO_COLS);
        Map<String, String> rules = ((BasicBSONObject) JSON.parse(mongoFieldsStr)).toMap();
        // register the hive field mappings to mongo field mappings
        hiveToMongo = new HashMap<String, String>();
        registerMappings(rules);
    }
    // Get the table column types
    String colTypesStr = tblProps.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(colTypesStr);
    if (columnNames.size() != columnTypes.size()) {
        throw new SerDeException("Column Names and Types don't match in size");
    }
    // Get the structure and object inspector
    docTypeInfo = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
    docOI = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(docTypeInfo);
    // Create the BSONWritable instance for future use.
    bsonWritable = new BSONWritable();
}
Also used : BSONWritable(com.mongodb.hadoop.io.BSONWritable) BasicBSONObject(org.bson.BasicBSONObject) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 7 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project phoenix by apache.

the class PhoenixSerDe method createLazyPhoenixInspector.

private ObjectInspector createLazyPhoenixInspector(Configuration conf, Properties tbl) throws SerDeException {
    List<String> columnNameList = Arrays.asList(tbl.getProperty(serdeConstants.LIST_COLUMNS).split(PhoenixStorageHandlerConstants.COMMA));
    List<TypeInfo> columnTypeList = TypeInfoUtils.getTypeInfosFromTypeString(tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES));
    List<ObjectInspector> columnObjectInspectors = Lists.newArrayListWithExpectedSize(columnTypeList.size());
    for (TypeInfo typeInfo : columnTypeList) {
        columnObjectInspectors.add(PhoenixObjectInspectorFactory.createObjectInspector(typeInfo, serdeParams));
    }
    return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(columnNameList, columnObjectInspectors, null, serdeParams.getSeparators()[0], serdeParams, ObjectInspectorOptions.JAVA);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 8 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project carbondata by apache.

the class CarbonHiveRecordReader method initialize.

public void initialize(InputSplit inputSplit, Configuration conf) throws IOException {
    // The input split can contain single HDFS block or multiple blocks, so firstly get all the
    // blocks and then set them in the query model.
    List<CarbonHiveInputSplit> splitList;
    if (inputSplit instanceof CarbonHiveInputSplit) {
        splitList = new ArrayList<>(1);
        splitList.add((CarbonHiveInputSplit) inputSplit);
    } else {
        throw new RuntimeException("unsupported input split type: " + inputSplit);
    }
    List<TableBlockInfo> tableBlockInfoList = CarbonHiveInputSplit.createBlocks(splitList);
    queryModel.setTableBlockInfos(tableBlockInfoList);
    readSupport.initialize(queryModel.getProjectionColumns(), queryModel.getAbsoluteTableIdentifier());
    try {
        carbonIterator = new ChunkRowIterator(queryExecutor.execute(queryModel));
    } catch (QueryExecutionException e) {
        throw new IOException(e.getMessage(), e.getCause());
    }
    if (valueObj == null) {
        valueObj = new ArrayWritable(Writable.class, new Writable[queryModel.getProjectionColumns().length]);
    }
    final TypeInfo rowTypeInfo;
    final List<String> columnNames;
    List<TypeInfo> columnTypes;
    // Get column names and sort order
    final String colIds = conf.get("hive.io.file.readcolumn.ids");
    final String columnNameProperty = conf.get("hive.io.file.readcolumn.names");
    final String columnTypeProperty = conf.get(serdeConstants.LIST_COLUMN_TYPES);
    if (columnNameProperty.length() == 0) {
        columnNames = new ArrayList<String>();
    } else {
        columnNames = Arrays.asList(columnNameProperty.split(","));
    }
    if (columnTypeProperty.length() == 0) {
        columnTypes = new ArrayList<TypeInfo>();
    } else {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    String[] arraySelectedColId = colIds.split(",");
    List<TypeInfo> reqColTypes = new ArrayList<TypeInfo>();
    for (String anArrayColId : arraySelectedColId) {
        reqColTypes.add(columnTypes.get(Integer.parseInt(anArrayColId)));
    }
    // Create row related objects
    rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, reqColTypes);
    this.objInspector = new CarbonObjectInspector((StructTypeInfo) rowTypeInfo);
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) ChunkRowIterator(org.apache.carbondata.core.scan.result.iterator.ChunkRowIterator) ArrayList(java.util.ArrayList) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) IOException(java.io.IOException) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) QueryExecutionException(org.apache.carbondata.core.scan.executor.exception.QueryExecutionException) ArrayWritable(org.apache.hadoop.io.ArrayWritable)

Example 9 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project carbondata by apache.

the class CarbonHiveSerDe method initialize.

@Override
public void initialize(@Nullable Configuration configuration, Properties tbl) throws SerDeException {
    final TypeInfo rowTypeInfo;
    final List<String> columnNames;
    final List<String> reqColNames;
    final List<TypeInfo> columnTypes;
    // Get column names and sort order
    assert configuration != null;
    final String colIds = configuration.get("hive.io.file.readcolumn.ids");
    final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
    final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    if (columnNameProperty.length() == 0) {
        columnNames = new ArrayList<String>();
    } else {
        columnNames = Arrays.asList(columnNameProperty.split(","));
    }
    if (columnTypeProperty.length() == 0) {
        columnTypes = new ArrayList<TypeInfo>();
    } else {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    if (colIds != null) {
        reqColNames = new ArrayList<String>();
        String[] arraySelectedColId = colIds.split(",");
        List<TypeInfo> reqColTypes = new ArrayList<TypeInfo>();
        for (String anArrayColId : arraySelectedColId) {
            reqColNames.add(columnNames.get(Integer.parseInt(anArrayColId)));
            reqColTypes.add(columnTypes.get(Integer.parseInt(anArrayColId)));
        }
        // Create row related objects
        rowTypeInfo = TypeInfoFactory.getStructTypeInfo(reqColNames, reqColTypes);
        this.objInspector = new CarbonObjectInspector((StructTypeInfo) rowTypeInfo);
    } else {
        // Create row related objects
        rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
        this.objInspector = new CarbonObjectInspector((StructTypeInfo) rowTypeInfo);
        // Stats part
        serializedSize = 0;
        deserializedSize = 0;
        status = LAST_OPERATION.UNKNOWN;
    }
}
Also used : ArrayList(java.util.ArrayList) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 10 with TypeInfoUtils.getTypeInfosFromTypeString

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfosFromTypeString in project hive by apache.

the class TestLazyArrayMapStruct method testLazyArray.

/**
 * Test the LazyArray class.
 */
public void testLazyArray() throws Throwable {
    try {
        // Array of Byte
        Text nullSequence = new Text("\\N");
        ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("array<tinyint>").get(0), new byte[] { (byte) 1 }, 0, nullSequence, false, (byte) 0);
        LazyArray b = (LazyArray) LazyFactory.createLazyObject(oi);
        byte[] data = new byte[] { '-', '1', 1, '\\', 'N', 1, '8' };
        TestLazyPrimitive.initLazyObject(b, data, 0, data.length);
        assertNull(b.getListElementObject(-1));
        assertEquals(new ByteWritable((byte) -1), ((LazyByte) b.getListElementObject(0)).getWritableObject());
        assertEquals(new ByteWritable((byte) -1), ((LazyByte) b.getList().get(0)).getWritableObject());
        assertNull(b.getListElementObject(1));
        assertNull(b.getList().get(1));
        assertEquals(new ByteWritable((byte) 8), ((LazyByte) b.getListElementObject(2)).getWritableObject());
        assertEquals(new ByteWritable((byte) 8), ((LazyByte) b.getList().get(2)).getWritableObject());
        assertNull(b.getListElementObject(3));
        assertEquals(3, b.getList().size());
        // Array of String
        oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils.getTypeInfosFromTypeString("array<string>").get(0), new byte[] { (byte) '\t' }, 0, nullSequence, false, (byte) 0);
        b = (LazyArray) LazyFactory.createLazyObject(oi);
        data = new byte[] { 'a', 'b', '\t', 'c', '\t', '\\', 'N', '\t', '\t', 'd' };
        // Note: the first and last element of the byte[] are NOT used
        TestLazyPrimitive.initLazyObject(b, data, 1, data.length - 2);
        assertNull(b.getListElementObject(-1));
        assertEquals(new Text("b"), ((LazyString) b.getListElementObject(0)).getWritableObject());
        assertEquals(new Text("b"), ((LazyString) b.getList().get(0)).getWritableObject());
        assertEquals(new Text("c"), ((LazyString) b.getListElementObject(1)).getWritableObject());
        assertEquals(new Text("c"), ((LazyString) b.getList().get(1)).getWritableObject());
        assertNull((b.getListElementObject(2)));
        assertNull((b.getList().get(2)));
        assertEquals(new Text(""), ((LazyString) b.getListElementObject(3)).getWritableObject());
        assertEquals(new Text(""), ((LazyString) b.getList().get(3)).getWritableObject());
        assertEquals(new Text(""), ((LazyString) b.getListElementObject(4)).getWritableObject());
        assertEquals(new Text(""), ((LazyString) b.getList().get(4)).getWritableObject());
        assertNull((b.getListElementObject(5)));
        assertEquals(5, b.getList().size());
        // -- HIVE-4149
        b = (LazyArray) LazyFactory.createLazyObject(oi);
        data = new byte[] { 'a', '\t', '\\', 'N' };
        TestLazyPrimitive.initLazyObject(b, data, 0, data.length);
        assertEquals(new Text("a"), ((LazyString) b.getListElementObject(0)).getWritableObject());
        assertNull(b.getListElementObject(1));
        data = new byte[] { '\\', 'N', '\t', 'a' };
        TestLazyPrimitive.initLazyObject(b, data, 0, data.length);
        assertNull(b.getListElementObject(0));
        // twice (returns not cleaned cache)
        assertNull(b.getListElementObject(0));
        assertEquals(new Text("a"), ((LazyString) b.getListElementObject(1)).getWritableObject());
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Text(org.apache.hadoop.io.Text) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable)

Aggregations

TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)23 ArrayList (java.util.ArrayList)18 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)17 Text (org.apache.hadoop.io.Text)14 LazyMapObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector)11 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)9 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)9 Cell (org.apache.hadoop.hbase.Cell)7 KeyValue (org.apache.hadoop.hbase.KeyValue)7 Result (org.apache.hadoop.hbase.client.Result)7 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)7 IntWritable (org.apache.hadoop.io.IntWritable)7 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)6 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)5 Test (org.junit.Test)5 HiveAccumuloMapColumnMapping (org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping)4 IOException (java.io.IOException)3 ColumnMapping (org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping)2 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)2 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)2