Search in sources :

Example 1 with MapColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.MapColumnVector in project hive by apache.

the class RecordReaderImpl method nextMap.

static Map<Object, Object> nextMap(ColumnVector vector, int row, TypeDescription schema, Object previous) {
    if (vector.isRepeating) {
        row = 0;
    }
    if (vector.noNulls || !vector.isNull[row]) {
        MapColumnVector map = (MapColumnVector) vector;
        int length = (int) map.lengths[row];
        int offset = (int) map.offsets[row];
        TypeDescription keyType = schema.getChildren().get(0);
        TypeDescription valueType = schema.getChildren().get(1);
        LinkedHashMap<Object, Object> result;
        if (previous == null || previous.getClass() != LinkedHashMap.class) {
            result = new LinkedHashMap<Object, Object>(length);
        } else {
            result = (LinkedHashMap<Object, Object>) previous;
            // I couldn't think of a good way to reuse the keys and value objects
            // without even more allocations, so take the easy and safe approach.
            result.clear();
        }
        for (int e = 0; e < length; ++e) {
            result.put(nextValue(map.keys, e + offset, keyType, null), nextValue(map.values, e + offset, valueType, null));
        }
        return result;
    } else {
        return null;
    }
}
Also used : MapColumnVector(org.apache.hadoop.hive.ql.exec.vector.MapColumnVector) TypeDescription(org.apache.orc.TypeDescription) LinkedHashMap(java.util.LinkedHashMap)

Example 2 with MapColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.MapColumnVector in project hive by apache.

the class TestVectorizedMapColumnReader method testMapRead.

private void testMapRead(boolean isDictionaryEncoding, String type, int elementNum) throws Exception {
    Configuration conf = new Configuration();
    setTypeConfiguration(type, conf);
    conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
    conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
    VectorizedParquetRecordReader reader = createTestParquetReader(getSchema(type), conf);
    VectorizedRowBatch previous = reader.createValue();
    int row = 0;
    int index = 0;
    try {
        while (reader.next(NullWritable.get(), previous)) {
            MapColumnVector mapVector = (MapColumnVector) previous.cols[0];
            // since Repeating only happens when offset length is 1.
            assertEquals((mapVector.offsets.length == 1), mapVector.isRepeating);
            for (int i = 0; i < mapVector.offsets.length; i++) {
                if (row == elementNum) {
                    assertEquals(i, mapVector.offsets.length - 1);
                    break;
                }
                long start = mapVector.offsets[i];
                long length = mapVector.lengths[i];
                boolean isNull = isNull(row);
                if (isNull) {
                    assertEquals(mapVector.isNull[i], true);
                } else {
                    for (long j = 0; j < length; j++) {
                        assertValue(type, mapVector.keys, isDictionaryEncoding, index, (int) (start + j));
                        assertValue(type, mapVector.values, isDictionaryEncoding, index, (int) (start + j));
                        index++;
                    }
                }
                row++;
            }
        }
        assertEquals("It doesn't exit at expected position", elementNum, row);
    } finally {
        reader.close();
    }
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) Configuration(org.apache.hadoop.conf.Configuration) MapColumnVector(org.apache.hadoop.hive.ql.exec.vector.MapColumnVector) VectorizedParquetRecordReader(org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)

Example 3 with MapColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.MapColumnVector in project hive by apache.

the class TestVectorizedMapColumnReader method testRepeateMapRead.

private void testRepeateMapRead(int elementNum, boolean isNull) throws Exception {
    Configuration conf = new Configuration();
    conf.set(IOConstants.COLUMNS, "map_int32_for_repeat_test");
    conf.set(IOConstants.COLUMNS_TYPES, "map<int,int>");
    conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
    conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0");
    String schema = "message hive_schema {\n" + "  repeated group map_int32_for_repeat_test (MAP_KEY_VALUE) {\n" + "    required int32 key;\n" + "    optional int32 value;\n" + "  }\n" + "}\n";
    VectorizedParquetRecordReader reader = createTestParquetReader(schema, conf);
    VectorizedRowBatch previous = reader.createValue();
    int row = 0;
    try {
        while (reader.next(NullWritable.get(), previous)) {
            MapColumnVector mapVector = (MapColumnVector) previous.cols[0];
            assertTrue(mapVector.isRepeating);
            assertEquals(isNull, mapVector.isNull[0]);
            for (int i = 0; i < mapVector.offsets.length; i++) {
                if (row == elementNum) {
                    assertEquals(i, mapVector.offsets.length - 1);
                    break;
                }
                row++;
            }
        }
        assertEquals("It doesn't exit at expected position", elementNum, row);
    } finally {
        reader.close();
    }
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) Configuration(org.apache.hadoop.conf.Configuration) MapColumnVector(org.apache.hadoop.hive.ql.exec.vector.MapColumnVector) VectorizedParquetRecordReader(org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)

Example 4 with MapColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.MapColumnVector in project hive by apache.

the class VectorUDFMapIndexBaseCol method evaluate.

@Override
public void evaluate(VectorizedRowBatch batch) throws HiveException {
    // return immediately if batch is empty
    final int n = batch.size;
    if (n == 0) {
        return;
    }
    if (childExpressions != null) {
        super.evaluateChildren(batch);
    }
    ColumnVector outV = batch.cols[outputColumnNum];
    MapColumnVector mapV = (MapColumnVector) batch.cols[inputColumnNum[0]];
    // indexColumnVector includes the keys of Map
    indexColumnVector = batch.cols[inputColumnNum[1]];
    ColumnVector valuesV = mapV.values;
    int[] sel = batch.selected;
    boolean[] indexIsNull = indexColumnVector.isNull;
    boolean[] mapIsNull = mapV.isNull;
    boolean[] outputIsNull = outV.isNull;
    // We do not need to do a column reset since we are carefully changing the output.
    outV.isRepeating = false;
    if (indexColumnVector.isRepeating) {
        /*
       * Repeated index or repeated NULL index.
       */
        if (indexColumnVector.noNulls || !indexIsNull[0]) {
            /*
         * Same INDEX for entire batch.
         */
            if (mapV.isRepeating) {
                if (mapV.noNulls || !mapIsNull[0]) {
                    final int repeatedMapIndex = findInMap(indexColumnVector, 0, mapV, 0);
                    if (repeatedMapIndex == -1) {
                        outV.isNull[0] = true;
                        outV.noNulls = false;
                    } else {
                        outV.isNull[0] = false;
                        outV.setElement(0, repeatedMapIndex, valuesV);
                    }
                } else {
                    outputIsNull[0] = true;
                    outV.noNulls = false;
                }
                outV.isRepeating = true;
                return;
            }
            /*
         * Individual row processing for LIST vector with *repeated* INDEX value.
         */
            if (mapV.noNulls) {
                if (batch.selectedInUse) {
                    if (!outV.noNulls) {
                        for (int j = 0; j < n; j++) {
                            final int i = sel[j];
                            final int mapIndex = findInMap(indexColumnVector, 0, mapV, i);
                            if (mapIndex == -1) {
                                outV.isNull[i] = true;
                                outV.noNulls = false;
                            } else {
                                outV.isNull[i] = false;
                                outV.setElement(i, mapIndex, valuesV);
                            }
                        }
                    } else {
                        for (int j = 0; j < n; j++) {
                            final int i = sel[j];
                            final int mapIndex = findInMap(indexColumnVector, 0, mapV, i);
                            if (mapIndex == -1) {
                                outV.isNull[i] = true;
                                outV.noNulls = false;
                            } else {
                                outV.setElement(i, mapIndex, valuesV);
                            }
                        }
                    }
                } else {
                    if (!outV.noNulls) {
                        // Assume it is almost always a performance win to fill all of isNull so we can
                        // safely reset noNulls.
                        Arrays.fill(outputIsNull, false);
                        outV.noNulls = true;
                    }
                    for (int i = 0; i < n; i++) {
                        final int mapIndex = findInMap(indexColumnVector, 0, mapV, i);
                        if (mapIndex == -1) {
                            outV.isNull[i] = true;
                            outV.noNulls = false;
                        } else {
                            outV.setElement(i, mapIndex, valuesV);
                        }
                    }
                }
            } else /* there are NULLs in the LIST */
            {
                if (batch.selectedInUse) {
                    for (int j = 0; j != n; j++) {
                        int i = sel[j];
                        if (!mapIsNull[i]) {
                            final int mapIndex = findInMap(indexColumnVector, 0, mapV, i);
                            if (mapIndex == -1) {
                                outV.isNull[i] = true;
                                outV.noNulls = false;
                            } else {
                                outV.isNull[i] = false;
                                outV.setElement(i, mapIndex, valuesV);
                            }
                        } else {
                            outputIsNull[i] = true;
                            outV.noNulls = false;
                        }
                    }
                } else {
                    for (int i = 0; i != n; i++) {
                        if (!mapIsNull[i]) {
                            final int mapIndex = findInMap(indexColumnVector, 0, mapV, i);
                            if (mapIndex == -1) {
                                outV.isNull[i] = true;
                                outV.noNulls = false;
                            } else {
                                outV.isNull[i] = false;
                                outV.setElement(i, mapIndex, valuesV);
                            }
                        } else {
                            outputIsNull[i] = true;
                            outV.noNulls = false;
                        }
                    }
                }
            }
        } else {
            outputIsNull[0] = true;
            outV.noNulls = false;
            outV.isRepeating = true;
        }
        return;
    }
    if (mapV.isRepeating) {
        if (mapV.noNulls || !mapIsNull[0]) {
            if (indexColumnVector.noNulls) {
                if (batch.selectedInUse) {
                    if (!outV.noNulls) {
                        for (int j = 0; j != n; j++) {
                            final int i = sel[j];
                            final int mapIndex = findInMap(indexColumnVector, i, mapV, 0);
                            if (mapIndex == -1) {
                                outV.isNull[i] = true;
                                outV.noNulls = false;
                            } else {
                                outV.isNull[i] = false;
                                outV.setElement(i, mapIndex, valuesV);
                            }
                        }
                    } else {
                        for (int j = 0; j != n; j++) {
                            final int i = sel[j];
                            final int mapIndex = findInMap(indexColumnVector, i, mapV, 0);
                            if (mapIndex == -1) {
                                outV.isNull[i] = true;
                                outV.noNulls = false;
                            } else {
                                outV.setElement(i, mapIndex, valuesV);
                            }
                        }
                    }
                } else {
                    if (!outV.noNulls) {
                        // Assume it is almost always a performance win to fill all of isNull so we can
                        // safely reset noNulls.
                        Arrays.fill(outputIsNull, false);
                        outV.noNulls = true;
                    }
                    for (int i = 0; i != n; i++) {
                        final int mapIndex = findInMap(indexColumnVector, i, mapV, 0);
                        if (mapIndex == -1) {
                            outV.isNull[i] = true;
                            outV.noNulls = false;
                        } else {
                            outV.setElement(i, mapIndex, valuesV);
                        }
                    }
                }
            } else /* there are NULLs in the inputColVector */
            {
                if (batch.selectedInUse) {
                    for (int j = 0; j != n; j++) {
                        int i = sel[j];
                        if (!indexIsNull[i]) {
                            final int mapIndex = findInMap(indexColumnVector, i, mapV, 0);
                            if (mapIndex == -1) {
                                outV.isNull[i] = true;
                                outV.noNulls = false;
                            } else {
                                outV.isNull[i] = false;
                                outV.setElement(i, mapIndex, valuesV);
                            }
                        } else {
                            outputIsNull[i] = true;
                            outV.noNulls = false;
                        }
                    }
                } else {
                    for (int i = 0; i != n; i++) {
                        if (!indexIsNull[i]) {
                            final int mapIndex = findInMap(indexColumnVector, i, mapV, 0);
                            if (mapIndex == -1) {
                                outV.isNull[i] = true;
                                outV.noNulls = false;
                            } else {
                                outV.isNull[i] = false;
                                outV.setElement(i, mapIndex, valuesV);
                            }
                        } else {
                            outputIsNull[i] = true;
                            outV.noNulls = false;
                        }
                    }
                }
            }
        } else {
            outputIsNull[0] = true;
            outV.noNulls = false;
            outV.isRepeating = true;
        }
        return;
    }
    // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    /*
     * Individual row processing for INDEX vectors and LIST vectors.
     */
    final boolean listNoNulls = mapV.noNulls;
    if (indexColumnVector.noNulls) {
        if (batch.selectedInUse) {
            if (!outV.noNulls) {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    if (listNoNulls || !mapIsNull[i]) {
                        final int mapIndex = findInMap(indexColumnVector, i, mapV, i);
                        if (mapIndex == -1) {
                            outV.isNull[i] = true;
                            outV.noNulls = false;
                        } else {
                            outV.isNull[i] = false;
                            outV.setElement(i, mapIndex, valuesV);
                        }
                    } else {
                        outputIsNull[i] = true;
                        outV.noNulls = false;
                    }
                }
            } else {
                for (int j = 0; j != n; j++) {
                    final int i = sel[j];
                    if (listNoNulls || !mapIsNull[i]) {
                        final int mapIndex = findInMap(indexColumnVector, i, mapV, i);
                        if (mapIndex == -1) {
                            outV.isNull[i] = true;
                            outV.noNulls = false;
                        } else {
                            outV.setElement(i, mapIndex, valuesV);
                        }
                    } else {
                        outputIsNull[i] = true;
                        outV.noNulls = false;
                    }
                }
            }
        } else {
            if (!outV.noNulls) {
                // Assume it is almost always a performance win to fill all of isNull so we can
                // safely reset noNulls.
                Arrays.fill(outputIsNull, false);
                outV.noNulls = true;
            }
            for (int i = 0; i != n; i++) {
                if (listNoNulls || !mapIsNull[i]) {
                    final int mapIndex = findInMap(indexColumnVector, i, mapV, i);
                    if (mapIndex == -1) {
                        outV.isNull[i] = true;
                        outV.noNulls = false;
                    } else {
                        outV.setElement(i, mapIndex, valuesV);
                    }
                } else {
                    outputIsNull[i] = true;
                    outV.noNulls = false;
                }
            }
        }
    } else /* there are NULLs in the inputColVector */
    {
        if (batch.selectedInUse) {
            for (int j = 0; j != n; j++) {
                int i = sel[j];
                if (!indexIsNull[i]) {
                    if (listNoNulls || !mapIsNull[i]) {
                        final int mapIndex = findInMap(indexColumnVector, i, mapV, i);
                        if (mapIndex == -1) {
                            outV.isNull[i] = true;
                            outV.noNulls = false;
                        } else {
                            outV.isNull[i] = false;
                            outV.setElement(i, mapIndex, valuesV);
                        }
                    } else {
                        outputIsNull[i] = true;
                        outV.noNulls = false;
                    }
                } else {
                    outputIsNull[i] = true;
                    outV.noNulls = false;
                }
            }
        } else {
            for (int i = 0; i != n; i++) {
                if (!indexIsNull[i]) {
                    if (listNoNulls || !mapIsNull[i]) {
                        final int mapIndex = findInMap(indexColumnVector, i, mapV, i);
                        if (mapIndex == -1) {
                            outV.isNull[i] = true;
                            outV.noNulls = false;
                        } else {
                            outV.isNull[i] = false;
                            outV.setElement(i, mapIndex, valuesV);
                        }
                    } else {
                        outputIsNull[i] = true;
                        outV.noNulls = false;
                    }
                } else {
                    outputIsNull[i] = true;
                    outV.noNulls = false;
                }
            }
        }
    }
}
Also used : MapColumnVector(org.apache.hadoop.hive.ql.exec.vector.MapColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) MapColumnVector(org.apache.hadoop.hive.ql.exec.vector.MapColumnVector)

Example 5 with MapColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.MapColumnVector in project flink by apache.

the class OrcBulkRowDataWriterTest method getResults.

private static List<RowData> getResults(Reader reader) throws IOException {
    List<RowData> results = new ArrayList<>();
    RecordReader recordReader = reader.rows();
    VectorizedRowBatch batch = reader.getSchema().createRowBatch();
    while (recordReader.nextBatch(batch)) {
        BytesColumnVector stringVector = (BytesColumnVector) batch.cols[0];
        LongColumnVector intVector = (LongColumnVector) batch.cols[1];
        ListColumnVector listVector = (ListColumnVector) batch.cols[2];
        MapColumnVector mapVector = (MapColumnVector) batch.cols[3];
        for (int r = 0; r < batch.size; r++) {
            GenericRowData readRowData = new GenericRowData(4);
            readRowData.setField(0, readStringData(stringVector, r));
            readRowData.setField(1, readInt(intVector, r));
            readRowData.setField(2, readList(listVector, r));
            readRowData.setField(3, readMap(mapVector, r));
            results.add(readRowData);
        }
        recordReader.close();
    }
    return results;
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) ListColumnVector(org.apache.hadoop.hive.ql.exec.vector.ListColumnVector) MapColumnVector(org.apache.hadoop.hive.ql.exec.vector.MapColumnVector) RecordReader(org.apache.orc.RecordReader) ArrayList(java.util.ArrayList) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) GenericRowData(org.apache.flink.table.data.GenericRowData) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)

Aggregations

MapColumnVector (org.apache.hadoop.hive.ql.exec.vector.MapColumnVector)11 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)3 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)3 LinkedHashMap (java.util.LinkedHashMap)2 Configuration (org.apache.hadoop.conf.Configuration)2 ListColumnVector (org.apache.hadoop.hive.ql.exec.vector.ListColumnVector)2 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)2 VectorizedParquetRecordReader (org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader)2 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)2 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)2 ArrayList (java.util.ArrayList)1 Map (java.util.Map)1 Random (java.util.Random)1 GenericRowData (org.apache.flink.table.data.GenericRowData)1 RowData (org.apache.flink.table.data.RowData)1 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)1 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)1 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)1 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)1 SettableMapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.SettableMapObjectInspector)1