Search in sources :

Example 6 with MapColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.MapColumnVector in project hive by apache.

the class VectorizedMapColumnReader method readBatch.

@Override
public void readBatch(int total, ColumnVector column, TypeInfo columnType) throws IOException {
    MapColumnVector mapColumnVector = (MapColumnVector) column;
    MapTypeInfo mapTypeInfo = (MapTypeInfo) columnType;
    ListTypeInfo keyListTypeInfo = new ListTypeInfo();
    keyListTypeInfo.setListElementTypeInfo(mapTypeInfo.getMapKeyTypeInfo());
    ListTypeInfo valueListTypeInfo = new ListTypeInfo();
    valueListTypeInfo.setListElementTypeInfo(mapTypeInfo.getMapValueTypeInfo());
    // initialize 2 ListColumnVector for keys and values
    ListColumnVector keyListColumnVector = new ListColumnVector();
    ListColumnVector valueListColumnVector = new ListColumnVector();
    // read the keys and values
    keyColumnReader.readBatch(total, keyListColumnVector, keyListTypeInfo);
    valueColumnReader.readBatch(total, valueListColumnVector, valueListTypeInfo);
    // set the related attributes according to the keys and values
    mapColumnVector.keys = keyListColumnVector.child;
    mapColumnVector.values = valueListColumnVector.child;
    mapColumnVector.isNull = keyListColumnVector.isNull;
    mapColumnVector.offsets = keyListColumnVector.offsets;
    mapColumnVector.lengths = keyListColumnVector.lengths;
    mapColumnVector.childCount = keyListColumnVector.childCount;
    mapColumnVector.isRepeating = keyListColumnVector.isRepeating && valueListColumnVector.isRepeating;
}
Also used : MapColumnVector(org.apache.hadoop.hive.ql.exec.vector.MapColumnVector) ListColumnVector(org.apache.hadoop.hive.ql.exec.vector.ListColumnVector) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)

Example 7 with MapColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.MapColumnVector in project hive by apache.

the class BatchToRowReader method nextMap.

private Map<Object, Object> nextMap(ColumnVector vector, int row, MapTypeInfo schema, Object previous) {
    if (vector.isRepeating) {
        row = 0;
    }
    if (vector.noNulls || !vector.isNull[row]) {
        MapColumnVector map = (MapColumnVector) vector;
        int length = (int) map.lengths[row];
        int offset = (int) map.offsets[row];
        TypeInfo keyType = schema.getMapKeyTypeInfo();
        TypeInfo valueType = schema.getMapValueTypeInfo();
        LinkedHashMap<Object, Object> result;
        if (previous == null || previous.getClass() != LinkedHashMap.class) {
            result = new LinkedHashMap<Object, Object>(length);
        } else {
            result = (LinkedHashMap<Object, Object>) previous;
            // I couldn't think of a good way to reuse the keys and value objects
            // without even more allocations, so take the easy and safe approach.
            result.clear();
        }
        for (int e = 0; e < length; ++e) {
            result.put(nextValue(map.keys, e + offset, keyType, null), nextValue(map.values, e + offset, valueType, null));
        }
        return result;
    } else {
        return null;
    }
}
Also used : MapColumnVector(org.apache.hadoop.hive.ql.exec.vector.MapColumnVector) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) LinkedHashMap(java.util.LinkedHashMap)

Example 8 with MapColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.MapColumnVector in project hive by apache.

the class TestVectorExpressionWriters method testMapLong.

@SuppressWarnings("unchecked")
private void testMapLong() throws HiveException {
    LongColumnVector kcv = VectorizedRowGroupGenUtil.generateLongColumnVector(true, false, 5, new Random(10));
    LongColumnVector vcv = VectorizedRowGroupGenUtil.generateLongColumnVector(true, false, 5, new Random(10));
    MapColumnVector cv = new MapColumnVector(3, kcv, vcv);
    cv.init();
    // set the offset and length for the two elements
    cv.offsets[0] = 0;
    cv.lengths[0] = 2;
    cv.offsets[1] = 2;
    cv.lengths[1] = 3;
    // initialize the keys and values
    for (int i = 0; i < 5; i++) {
        kcv.vector[i] = i;
        kcv.isNull[i] = false;
    }
    kcv.noNulls = true;
    for (int i = 0; i < 5; i++) {
        vcv.vector[i] = 5 + i;
        vcv.isNull[i] = false;
    }
    vcv.noNulls = true;
    cv.isNull[0] = false;
    cv.isNull[1] = false;
    cv.isNull[2] = true;
    cv.noNulls = false;
    SettableMapObjectInspector mapOI = ObjectInspectorFactory.getStandardMapObjectInspector(PrimitiveObjectInspectorFactory.writableIntObjectInspector, PrimitiveObjectInspectorFactory.writableIntObjectInspector);
    VectorExpressionWriter vew = VectorExpressionWriterFactory.genVectorExpressionWritable(mapOI);
    Map<Object, Object> values1 = (Map<Object, Object>) mapOI.create();
    vew.setValue(values1, cv, 0);
    Map<Object, Object> values2 = (Map<Object, Object>) mapOI.create();
    vew.setValue(values2, cv, 1);
    TestCase.assertEquals(2, values1.size());
    TestCase.assertEquals(3, values2.size());
    for (int i = 0; i < values1.size(); i++) {
        IntWritable key = new IntWritable(i);
        IntWritable w = (IntWritable) values1.get(key);
        TestCase.assertEquals(5 + i, w.get());
    }
    for (int i = 0; i < values2.size(); i++) {
        IntWritable key = new IntWritable(2 + i);
        IntWritable w = (IntWritable) values2.get(key);
        TestCase.assertEquals(5 + 2 + i, w.get());
    }
    Map<Object, Object> values3 = (Map<Object, Object>) vew.writeValue(cv, 0);
    TestCase.assertEquals(2, values3.size());
    for (int i = 0; i < values1.size(); i++) {
        IntWritable key = new IntWritable(i);
        IntWritable w = (IntWritable) values1.get(key);
        TestCase.assertEquals(5 + i, w.get());
    }
    Map<Object, Object> values4 = (Map<Object, Object>) vew.writeValue(cv, 1);
    TestCase.assertEquals(3, values4.size());
    for (int i = 0; i < values2.size(); i++) {
        IntWritable key = new IntWritable(2 + i);
        IntWritable w = (IntWritable) values2.get(key);
        TestCase.assertEquals(5 + 2 + i, w.get());
    }
    Map<Object, Object> values5 = (Map<Object, Object>) vew.writeValue(cv, 2);
    TestCase.assertNull(values5);
}
Also used : Random(java.util.Random) MapColumnVector(org.apache.hadoop.hive.ql.exec.vector.MapColumnVector) SettableMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableMapObjectInspector) Map(java.util.Map) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) IntWritable(org.apache.hadoop.io.IntWritable)

Example 9 with MapColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.MapColumnVector in project hive by apache.

the class VectorUDFMapIndexBaseScalar method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) throws HiveException {
    // return immediately if batch is empty
    final int n = batch.size;
    if (n == 0) {
        return;
    }
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    ColumnVector outV = batch.cols[outputColumnNum];
    MapColumnVector mapV = (MapColumnVector) batch.cols[inputColumnNum[0]];
    ColumnVector valuesV = mapV.values;
    int[] sel = batch.selected;
    boolean[] mapIsNull = mapV.isNull;
    boolean[] outputIsNull = outV.isNull;
    // We do not need to do a column reset since we are carefully changing the output.
    outV.isRepeating = false;
    if (mapV.isRepeating) {
        if (mapV.noNulls || !mapIsNull[0]) {
            final int repeatedMapIndex = findScalarInMap(mapV, 0);
            if (repeatedMapIndex == -1) {
                outV.isNull[0] = true;
                outV.noNulls = false;
            } else {
                outV.isNull[0] = false;
                outV.setElement(0, repeatedMapIndex, valuesV);
            }
        } else {
            outV.isNull[0] = true;
            outV.noNulls = false;
        }
        outV.isRepeating = true;
        return;
    }
    /*
     * Individual row processing for LIST vector with scalar constant INDEX value.
     */
    if (mapV.noNulls) {
        if (batch.selectedInUse) {
            if (!outV.noNulls) {
                for (int j = 0; j < n; j++) {
                    final int i = sel[j];
                    final int mapIndex = findScalarInMap(mapV, i);
                    if (mapIndex == -1) {
                        outV.isNull[i] = true;
                        outV.noNulls = false;
                    } else {
                        outV.isNull[i] = false;
                        outV.setElement(i, mapIndex, valuesV);
                    }
                }
            } else {
                for (int j = 0; j < n; j++) {
                    final int i = sel[j];
                    final int mapIndex = findScalarInMap(mapV, i);
                    if (mapIndex == -1) {
                        outV.isNull[i] = true;
                        outV.noNulls = false;
                    } else {
                        outV.setElement(i, mapIndex, valuesV);
                    }
                }
            }
        } else {
            if (!outV.noNulls) {
                // Assume it is almost always a performance win to fill all of isNull so we can
                // safely reset noNulls.
                Arrays.fill(outV.isNull, false);
                outV.noNulls = true;
            }
            for (int i = 0; i < n; i++) {
                final long longListLength = mapV.lengths[i];
                final int mapIndex = findScalarInMap(mapV, i);
                if (mapIndex == -1) {
                    outV.isNull[i] = true;
                    outV.noNulls = false;
                } else {
                    outV.setElement(i, mapIndex, valuesV);
                }
            }
        }
    } else /* there are NULLs in the MAP */
    {
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                if (!mapIsNull[i]) {
                    final int mapIndex = findScalarInMap(mapV, i);
                    if (mapIndex == -1) {
                        outV.isNull[i] = true;
                        outV.noNulls = false;
                    } else {
                        outV.isNull[i] = false;
                        outV.setElement(i, mapIndex, valuesV);
                    }
                } else {
                    outputIsNull[i] = true;
                    outV.noNulls = false;
                }
            }
        } else {
            for (int i = 0; i != n; i++) {
                if (!mapIsNull[i]) {
                    final int mapIndex = findScalarInMap(mapV, i);
                    if (mapIndex == -1) {
                        outV.isNull[i] = true;
                        outV.noNulls = false;
                    } else {
                        outV.isNull[i] = false;
                        outV.setElement(i, mapIndex, valuesV);
                    }
                } else {
                    outputIsNull[i] = true;
                    outV.noNulls = false;
                }
            }
        }
    }
}
Also used : MapColumnVector(org.apache.hadoop.hive.ql.exec.vector.MapColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) MapColumnVector(org.apache.hadoop.hive.ql.exec.vector.MapColumnVector)

Example 10 with MapColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.MapColumnVector in project hive by apache.

the class VectorUDFMapIndexDecimalScalar method findScalarInMap.

@Override
public int findScalarInMap(MapColumnVector mapColumnVector, int mapBatchIndex) {
    final int offset = (int) mapColumnVector.offsets[mapBatchIndex];
    final int count = (int) mapColumnVector.lengths[mapBatchIndex];
    ColumnVector keys = mapColumnVector.keys;
    if (keys instanceof DecimalColumnVector) {
        HiveDecimalWritable[] decimalKeyVector = ((DecimalColumnVector) keys).vector;
        for (int i = 0; i < count; i++) {
            if (decimalKeyVector[offset + i].compareTo(key) == 0) {
                return offset + i;
            }
        }
    } else {
        // For some strange reason we receive a double column vector...
        // The way we do VectorExpressionDescriptor may be inadequate in this case...
        double[] doubleKeyVector = ((DoubleColumnVector) keys).vector;
        for (int i = 0; i < count; i++) {
            if (doubleKeyVector[offset + i] == doubleKey) {
                return offset + i;
            }
        }
    }
    return -1;
}
Also used : DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) MapColumnVector(org.apache.hadoop.hive.ql.exec.vector.MapColumnVector) DecimalColumnVector(org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)

Aggregations

MapColumnVector (org.apache.hadoop.hive.ql.exec.vector.MapColumnVector)11 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)3 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)3 LinkedHashMap (java.util.LinkedHashMap)2 Configuration (org.apache.hadoop.conf.Configuration)2 ListColumnVector (org.apache.hadoop.hive.ql.exec.vector.ListColumnVector)2 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)2 VectorizedParquetRecordReader (org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)2 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)2 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)2 ArrayList (java.util.ArrayList)1 Map (java.util.Map)1 Random (java.util.Random)1 GenericRowData (org.apache.flink.table.data.GenericRowData)1 RowData (org.apache.flink.table.data.RowData)1 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)1 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)1 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)1 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)1 SettableMapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.SettableMapObjectInspector)1