Search in sources :

Example 6 with TypeInfoFactory.getStructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo in project hive by apache.

the class TestAccumuloSerDe method testStructOfMapSerialization.

@Test
public void testStructOfMapSerialization() throws IOException, SerDeException {
    List<String> columns = Arrays.asList("row", "col");
    List<String> structColNames = Arrays.asList("map1", "map2");
    TypeInfo mapTypeInfo = TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo);
    // struct<map1:map<string,string>,map2:map<string,string>>,string
    List<TypeInfo> types = Arrays.<TypeInfo>asList(TypeInfoFactory.getStructTypeInfo(structColNames, Arrays.asList(mapTypeInfo, mapTypeInfo)), TypeInfoFactory.stringTypeInfo);
    Properties tableProperties = new Properties();
    tableProperties.setProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS, ":rowid,cf:cq");
    // Use the default separators [0, 1, 2, 3, ..., 7]
    tableProperties.setProperty(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columns));
    tableProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, Joiner.on(',').join(types));
    AccumuloSerDeParameters accumuloSerDeParams = new AccumuloSerDeParameters(new Configuration(), tableProperties, AccumuloSerDe.class.getSimpleName());
    LazySerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters();
    byte[] seps = serDeParams.getSeparators();
    // struct<map<k:v,k:v>_map<k:v,k:v>>>
    TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
    LazyStringObjectInspector stringOI = (LazyStringObjectInspector) LazyFactory.createLazyObjectInspector(stringTypeInfo, new byte[] { 0 }, 0, serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazyMapObjectInspector mapOI = LazyObjectInspectorFactory.getLazySimpleMapObjectInspector(stringOI, stringOI, seps[3], seps[4], serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazySimpleStructObjectInspector rowStructOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(structColNames, Arrays.<ObjectInspector>asList(mapOI, mapOI), (byte) seps[2], serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(columns, Arrays.asList(rowStructOI, stringOI), seps[1], serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    AccumuloRowSerializer serializer = new AccumuloRowSerializer(0, serDeParams, accumuloSerDeParams.getColumnMappings(), new ColumnVisibility(), accumuloSerDeParams.getRowIdFactory());
    Map<String, String> map1 = new HashMap<String, String>(), map2 = new HashMap<String, String>();
    map1.put("key10", "value10");
    map1.put("key11", "value11");
    map2.put("key20", "value20");
    map2.put("key21", "value21");
    ByteArrayRef byteRef = new ByteArrayRef();
    // Default separators are 1-indexed (instead of 0-indexed), thus the separator at offset 1 is
    // (byte) 2
    // The separator for the hive row is \x02, for the row Id struct, \x03, and the maps \x04 and
    // \x05
    String accumuloRow = "key10\5value10\4key11\5value11\3key20\5value20\4key21\5value21";
    LazyStruct entireStruct = (LazyStruct) LazyFactory.createLazyObject(structOI);
    byteRef.setData((accumuloRow + "\2foo").getBytes());
    entireStruct.init(byteRef, 0, byteRef.getData().length);
    Mutation m = serializer.serialize(entireStruct, structOI);
    Assert.assertArrayEquals(accumuloRow.getBytes(), m.getRow());
    Assert.assertEquals(1, m.getUpdates().size());
    ColumnUpdate update = m.getUpdates().get(0);
    Assert.assertEquals("cf", new String(update.getColumnFamily()));
    Assert.assertEquals("cq", new String(update.getColumnQualifier()));
    Assert.assertEquals("foo", new String(update.getValue()));
    AccumuloHiveRow haRow = new AccumuloHiveRow(new String(m.getRow()));
    haRow.add("cf", "cq", "foo".getBytes());
    LazyAccumuloRow lazyAccumuloRow = new LazyAccumuloRow(structOI);
    lazyAccumuloRow.init(haRow, accumuloSerDeParams.getColumnMappings(), accumuloSerDeParams.getRowIdFactory());
    List<Object> objects = lazyAccumuloRow.getFieldsAsList();
    Assert.assertEquals(2, objects.size());
    Assert.assertEquals("foo", objects.get(1).toString());
    LazyStruct rowStruct = (LazyStruct) objects.get(0);
    List<Object> rowObjects = rowStruct.getFieldsAsList();
    Assert.assertEquals(2, rowObjects.size());
    LazyMap rowMap = (LazyMap) rowObjects.get(0);
    Map<?, ?> actualMap = rowMap.getMap();
    System.out.println("Actual map 1: " + actualMap);
    Map<String, String> actualStringMap = new HashMap<String, String>();
    for (Entry<?, ?> entry : actualMap.entrySet()) {
        actualStringMap.put(entry.getKey().toString(), entry.getValue().toString());
    }
    Assert.assertEquals(map1, actualStringMap);
    rowMap = (LazyMap) rowObjects.get(1);
    actualMap = rowMap.getMap();
    System.out.println("Actual map 2: " + actualMap);
    actualStringMap = new HashMap<String, String>();
    for (Entry<?, ?> entry : actualMap.entrySet()) {
        actualStringMap.put(entry.getKey().toString(), entry.getValue().toString());
    }
    Assert.assertEquals(map2, actualStringMap);
}
Also used : ColumnUpdate(org.apache.accumulo.core.data.ColumnUpdate) Configuration(org.apache.hadoop.conf.Configuration) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) HashMap(java.util.HashMap) LazyString(org.apache.hadoop.hive.serde2.lazy.LazyString) Properties(java.util.Properties) AccumuloHiveRow(org.apache.hadoop.hive.accumulo.AccumuloHiveRow) ColumnVisibility(org.apache.accumulo.core.security.ColumnVisibility) LazyStruct(org.apache.hadoop.hive.serde2.lazy.LazyStruct) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) LazyStringObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) LazyStringObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) LazyAccumuloRow(org.apache.hadoop.hive.accumulo.LazyAccumuloRow) LazyMap(org.apache.hadoop.hive.serde2.lazy.LazyMap) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) Mutation(org.apache.accumulo.core.data.Mutation) Test(org.junit.Test)

Example 7 with TypeInfoFactory.getStructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo in project carbondata by apache.

the class CarbonHiveRecordReader method initialize.

public void initialize(InputSplit inputSplit, Configuration conf) throws IOException {
    // The input split can contain single HDFS block or multiple blocks, so firstly get all the
    // blocks and then set them in the query model.
    List<CarbonHiveInputSplit> splitList;
    if (inputSplit instanceof CarbonHiveInputSplit) {
        splitList = new ArrayList<>(1);
        splitList.add((CarbonHiveInputSplit) inputSplit);
    } else {
        throw new RuntimeException("unsupported input split type: " + inputSplit);
    }
    List<TableBlockInfo> tableBlockInfoList = CarbonHiveInputSplit.createBlocks(splitList);
    queryModel.setTableBlockInfos(tableBlockInfoList);
    readSupport.initialize(queryModel.getProjectionColumns(), queryModel.getAbsoluteTableIdentifier());
    try {
        carbonIterator = new ChunkRowIterator(queryExecutor.execute(queryModel));
    } catch (QueryExecutionException e) {
        throw new IOException(e.getMessage(), e.getCause());
    }
    if (valueObj == null) {
        valueObj = new ArrayWritable(Writable.class, new Writable[queryModel.getProjectionColumns().length]);
    }
    final TypeInfo rowTypeInfo;
    final List<String> columnNames;
    List<TypeInfo> columnTypes;
    // Get column names and sort order
    final String colIds = conf.get("hive.io.file.readcolumn.ids");
    final String columnNameProperty = conf.get("hive.io.file.readcolumn.names");
    final String columnTypeProperty = conf.get(serdeConstants.LIST_COLUMN_TYPES);
    if (columnNameProperty.length() == 0) {
        columnNames = new ArrayList<String>();
    } else {
        columnNames = Arrays.asList(columnNameProperty.split(","));
    }
    if (columnTypeProperty.length() == 0) {
        columnTypes = new ArrayList<TypeInfo>();
    } else {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    String[] arraySelectedColId = colIds.split(",");
    List<TypeInfo> reqColTypes = new ArrayList<TypeInfo>();
    for (String anArrayColId : arraySelectedColId) {
        reqColTypes.add(columnTypes.get(Integer.parseInt(anArrayColId)));
    }
    // Create row related objects
    rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, reqColTypes);
    this.objInspector = new CarbonObjectInspector((StructTypeInfo) rowTypeInfo);
}
Also used : TableBlockInfo(org.apache.carbondata.core.datastore.block.TableBlockInfo) ChunkRowIterator(org.apache.carbondata.core.scan.result.iterator.ChunkRowIterator) ArrayList(java.util.ArrayList) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) IOException(java.io.IOException) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) QueryExecutionException(org.apache.carbondata.core.scan.executor.exception.QueryExecutionException) ArrayWritable(org.apache.hadoop.io.ArrayWritable)

Example 8 with TypeInfoFactory.getStructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo in project carbondata by apache.

the class CarbonHiveSerDe method initialize.

@Override
public void initialize(@Nullable Configuration configuration, Properties tbl) throws SerDeException {
    final TypeInfo rowTypeInfo;
    final List<String> columnNames;
    final List<String> reqColNames;
    final List<TypeInfo> columnTypes;
    // Get column names and sort order
    assert configuration != null;
    final String colIds = configuration.get("hive.io.file.readcolumn.ids");
    final String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
    final String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    if (columnNameProperty.length() == 0) {
        columnNames = new ArrayList<String>();
    } else {
        columnNames = Arrays.asList(columnNameProperty.split(","));
    }
    if (columnTypeProperty.length() == 0) {
        columnTypes = new ArrayList<TypeInfo>();
    } else {
        columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    }
    if (colIds != null) {
        reqColNames = new ArrayList<String>();
        String[] arraySelectedColId = colIds.split(",");
        List<TypeInfo> reqColTypes = new ArrayList<TypeInfo>();
        for (String anArrayColId : arraySelectedColId) {
            reqColNames.add(columnNames.get(Integer.parseInt(anArrayColId)));
            reqColTypes.add(columnTypes.get(Integer.parseInt(anArrayColId)));
        }
        // Create row related objects
        rowTypeInfo = TypeInfoFactory.getStructTypeInfo(reqColNames, reqColTypes);
        this.objInspector = new CarbonObjectInspector((StructTypeInfo) rowTypeInfo);
    } else {
        // Create row related objects
        rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
        this.objInspector = new CarbonObjectInspector((StructTypeInfo) rowTypeInfo);
        // Stats part
        serializedSize = 0;
        deserializedSize = 0;
        status = LAST_OPERATION.UNKNOWN;
    }
}
Also used : ArrayList(java.util.ArrayList) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 9 with TypeInfoFactory.getStructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo in project hive by apache.

the class TestKeyWrapperFactory method setup.

@Before
public void setup() throws Exception {
    SessionState ss = new SessionState(new HiveConf());
    SessionState.setCurrentSessionState(ss);
    ArrayList<Text> col1 = new ArrayList<Text>();
    col1.add(new Text("0"));
    col1.add(new Text("1"));
    col1.add(new Text("2"));
    col1.add(new Text("3"));
    TypeInfo col1Type = TypeInfoFactory.getListTypeInfo(TypeInfoFactory.stringTypeInfo);
    ArrayList<Text> cola = new ArrayList<Text>();
    cola.add(new Text("a"));
    cola.add(new Text("b"));
    cola.add(new Text("c"));
    TypeInfo colaType = TypeInfoFactory.getListTypeInfo(TypeInfoFactory.stringTypeInfo);
    try {
        ArrayList<Object> data = new ArrayList<Object>();
        data.add(col1);
        data.add(cola);
        ArrayList<String> names = new ArrayList<String>();
        names.add("col1");
        names.add("cola");
        ArrayList<TypeInfo> typeInfos = new ArrayList<TypeInfo>();
        typeInfos.add(col1Type);
        typeInfos.add(colaType);
        TypeInfo dataType = TypeInfoFactory.getStructTypeInfo(names, typeInfos);
        InspectableObject r = new InspectableObject();
        ObjectInspector[] oi = new ObjectInspector[1];
        r.o = data;
        oi[0] = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(dataType);
        try {
            // get a evaluator for a simple field expression
            ExprNodeDesc exprDesc = new ExprNodeColumnDesc(colaType, "cola", "", false);
            ExprNodeEvaluator eval = ExprNodeEvaluatorFactory.get(exprDesc);
            ExprNodeEvaluator[] evals = new ExprNodeEvaluator[1];
            evals[0] = eval;
            ObjectInspector resultOI = eval.initialize(oi[0]);
            ObjectInspector[] resultOIs = new ObjectInspector[1];
            resultOIs[0] = resultOI;
            factory = new KeyWrapperFactory(evals, oi, resultOIs);
        } catch (Throwable e) {
            e.printStackTrace();
            throw e;
        }
    } catch (Throwable e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    }
}
Also used : SessionState(org.apache.hadoop.hive.ql.session.SessionState) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) InspectableObject(org.apache.hadoop.hive.serde2.objectinspector.InspectableObject) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) HiveConf(org.apache.hadoop.hive.conf.HiveConf) InspectableObject(org.apache.hadoop.hive.serde2.objectinspector.InspectableObject) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Before(org.junit.Before)

Example 10 with TypeInfoFactory.getStructTypeInfo

use of org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getStructTypeInfo in project hive by apache.

the class VectorMapOperator method internalSetChildren.

/*
   * Create information for vector map operator.
   * The member oneRootOperator has been set.
   */
private void internalSetChildren(Configuration hconf) throws Exception {
    // The setupPartitionContextVars uses the prior read type to flush the prior deserializerBatch,
    // so set it here to none.
    currentReadType = VectorMapOperatorReadType.NONE;
    batchContext = conf.getVectorizedRowBatchCtx();
    /*
     * Use a different batch for vectorized Input File Format readers so they can do their work
     * overlapped with work of the row collection that vector/row deserialization does.  This allows
     * the partitions to mix modes (e.g. for us to flush the previously batched rows on file change).
     */
    vectorizedInputFileFormatBatch = batchContext.createVectorizedRowBatch();
    conf.setVectorizedRowBatch(vectorizedInputFileFormatBatch);
    /*
     * This batch is used by vector/row deserializer readers.
     */
    deserializerBatch = batchContext.createVectorizedRowBatch();
    batchCounter = 0;
    dataColumnCount = batchContext.getDataColumnCount();
    partitionColumnCount = batchContext.getPartitionColumnCount();
    partitionValues = new Object[partitionColumnCount];
    virtualColumnCount = batchContext.getVirtualColumnCount();
    rowIdentifierColumnNum = batchContext.findVirtualColumnNum(VirtualColumn.ROWID);
    hasRowIdentifier = (rowIdentifierColumnNum != -1);
    dataColumnNums = batchContext.getDataColumnNums();
    Preconditions.checkState(dataColumnNums != null);
    // Form a truncated boolean include array for our vector/row deserializers.
    determineDataColumnsToIncludeTruncated();
    /*
     * Create table related objects
     */
    final String[] rowColumnNames = batchContext.getRowColumnNames();
    final TypeInfo[] rowColumnTypeInfos = batchContext.getRowColumnTypeInfos();
    tableStructTypeInfo = TypeInfoFactory.getStructTypeInfo(Arrays.asList(rowColumnNames), Arrays.asList(rowColumnTypeInfos));
    tableStandardStructObjectInspector = (StandardStructObjectInspector) TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(tableStructTypeInfo);
    tableRowTypeInfos = batchContext.getRowColumnTypeInfos();
    /*
     * NOTE: We do not alter the projectedColumns / projectionSize of the batches to just be
     * the included columns (+ partition columns).
     *
     * For now, we need to model the object inspector rows because there are still several
     * vectorized operators that use them.
     *
     * We need to continue to model the Object[] as having null objects for not included columns
     * until the following has been fixed:
     *    o When we have to output a STRUCT for AVG we switch to row GroupBy operators.
     *    o Some variations of VectorMapOperator, VectorReduceSinkOperator, VectorFileSinkOperator
     *      use the row super class to process rows.
     */
    /*
     * The Vectorizer class enforces that there is only one TableScanOperator, so
     * we don't need the more complicated multiple root operator mapping that MapOperator has.
     */
    fileToPartitionContextMap = new HashMap<String, VectorPartitionContext>();
    // Temporary map so we only create one partition context entry.
    HashMap<PartitionDesc, VectorPartitionContext> partitionContextMap = new HashMap<PartitionDesc, VectorPartitionContext>();
    for (Map.Entry<Path, ArrayList<String>> entry : conf.getPathToAliases().entrySet()) {
        Path path = entry.getKey();
        PartitionDesc partDesc = conf.getPathToPartitionInfo().get(path);
        VectorPartitionContext vectorPartitionContext;
        if (!partitionContextMap.containsKey(partDesc)) {
            vectorPartitionContext = createAndInitPartitionContext(partDesc, hconf);
            partitionContextMap.put(partDesc, vectorPartitionContext);
        } else {
            vectorPartitionContext = partitionContextMap.get(partDesc);
        }
        fileToPartitionContextMap.put(path.toString(), vectorPartitionContext);
    }
    // Create list of one.
    List<Operator<? extends OperatorDesc>> children = new ArrayList<Operator<? extends OperatorDesc>>();
    children.add(oneRootOperator);
    setChildOperators(children);
}
Also used : Path(org.apache.hadoop.fs.Path) TableScanOperator(org.apache.hadoop.hive.ql.exec.TableScanOperator) Operator(org.apache.hadoop.hive.ql.exec.Operator) AbstractMapOperator(org.apache.hadoop.hive.ql.exec.AbstractMapOperator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) VectorPartitionDesc(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc) HashMap(java.util.HashMap) Map(java.util.Map) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc)

Aggregations

TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)17 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)11 ArrayList (java.util.ArrayList)7 ArrayWritableObjectInspector (org.apache.hadoop.hive.ql.io.parquet.serde.ArrayWritableObjectInspector)5 HashMap (java.util.HashMap)4 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)3 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)3 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)3 Map (java.util.Map)2 Configuration (org.apache.hadoop.conf.Configuration)2 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)2 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)2 BSONWritable (com.mongodb.hadoop.io.BSONWritable)1 IOException (java.io.IOException)1 Field (java.lang.reflect.Field)1 ParameterizedType (java.lang.reflect.ParameterizedType)1 EnumMap (java.util.EnumMap)1 LinkedList (java.util.LinkedList)1 List (java.util.List)1 Properties (java.util.Properties)1