Search in sources :

Example 1 with ListColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.ListColumnVector in project hive by apache.

the class BatchToRowReader method nextList.

private ArrayList<Object> nextList(ColumnVector vector, int row, ListTypeInfo schema, Object previous) {
    if (vector.isRepeating) {
        row = 0;
    }
    if (vector.noNulls || !vector.isNull[row]) {
        ArrayList<Object> result;
        if (previous == null || previous.getClass() != ArrayList.class) {
            result = new ArrayList<>();
        } else {
            result = (ArrayList<Object>) previous;
        }
        ListColumnVector list = (ListColumnVector) vector;
        int length = (int) list.lengths[row];
        int offset = (int) list.offsets[row];
        result.ensureCapacity(length);
        int oldLength = result.size();
        int idx = 0;
        TypeInfo childType = schema.getListElementTypeInfo();
        while (idx < length && idx < oldLength) {
            result.set(idx, nextValue(list.child, offset + idx, childType, result.get(idx)));
            idx += 1;
        }
        if (length < oldLength) {
            for (int i = oldLength - 1; i >= length; --i) {
                result.remove(i);
            }
        } else if (oldLength < length) {
            while (idx < length) {
                result.add(nextValue(list.child, offset + idx, childType, null));
                idx += 1;
            }
        }
        return result;
    } else {
        return null;
    }
}
Also used : ListColumnVector(org.apache.hadoop.hive.ql.exec.vector.ListColumnVector) ArrayList(java.util.ArrayList) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)

Example 2 with ListColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.ListColumnVector in project hive by apache.

the class RecordReaderImpl method copyListColumn.

void copyListColumn(ColumnVector destination, ColumnVector source, int sourceOffset, int length) {
    ListColumnVector castedSource = (ListColumnVector) source;
    ListColumnVector castedDestination = (ListColumnVector) destination;
    castedDestination.isRepeating = castedSource.noNulls;
    castedDestination.noNulls = castedSource.noNulls;
    if (source.isRepeating) {
        castedDestination.isNull[0] = castedSource.isNull[0];
        castedDestination.offsets[0] = 0;
        castedDestination.lengths[0] = castedSource.lengths[0];
        copyColumn(castedDestination.child, castedSource.child, (int) castedSource.offsets[0], (int) castedSource.lengths[0]);
    } else {
        if (!castedSource.noNulls) {
            for (int r = 0; r < length; ++r) {
                castedDestination.isNull[r] = castedSource.isNull[sourceOffset + r];
            }
        }
        int minOffset = Integer.MAX_VALUE;
        int maxOffset = Integer.MIN_VALUE;
        for (int r = 0; r < length; ++r) {
            int childOffset = (int) castedSource.offsets[r + sourceOffset];
            int childLength = (int) castedSource.lengths[r + sourceOffset];
            castedDestination.offsets[r] = childOffset;
            castedDestination.lengths[r] = childLength;
            minOffset = Math.min(minOffset, childOffset);
            maxOffset = Math.max(maxOffset, childOffset + childLength);
        }
        if (minOffset <= maxOffset) {
            castedDestination.childCount = maxOffset - minOffset + 1;
            copyColumn(castedDestination.child, castedSource.child, minOffset, castedDestination.childCount);
        } else {
            castedDestination.childCount = 0;
        }
    }
}
Also used : ListColumnVector(org.apache.hadoop.hive.ql.exec.vector.ListColumnVector)

Example 3 with ListColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.ListColumnVector in project hive by apache.

the class RecordReaderImpl method nextList.

static ArrayList<Object> nextList(ColumnVector vector, int row, TypeDescription schema, Object previous) {
    if (vector.isRepeating) {
        row = 0;
    }
    if (vector.noNulls || !vector.isNull[row]) {
        ArrayList<Object> result;
        if (previous == null || previous.getClass() != ArrayList.class) {
            result = new ArrayList<>();
        } else {
            result = (ArrayList<Object>) previous;
        }
        ListColumnVector list = (ListColumnVector) vector;
        int length = (int) list.lengths[row];
        int offset = (int) list.offsets[row];
        result.ensureCapacity(length);
        int oldLength = result.size();
        int idx = 0;
        TypeDescription childType = schema.getChildren().get(0);
        while (idx < length && idx < oldLength) {
            result.set(idx, nextValue(list.child, offset + idx, childType, result.get(idx)));
            idx += 1;
        }
        if (length < oldLength) {
            result.subList(length, result.size()).clear();
        } else if (oldLength < length) {
            while (idx < length) {
                result.add(nextValue(list.child, offset + idx, childType, null));
                idx += 1;
            }
        }
        return result;
    } else {
        return null;
    }
}
Also used : ListColumnVector(org.apache.hadoop.hive.ql.exec.vector.ListColumnVector) ArrayList(java.util.ArrayList) TypeDescription(org.apache.orc.TypeDescription)

Example 4 with ListColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.ListColumnVector in project druid by druid-io.

the class DruidOrcInputFormatTest method makeOrcFile.

private File makeOrcFile() throws IOException {
    final File dir = temporaryFolder.newFolder();
    final File testOrc = new File(dir, "test.orc");
    TypeDescription schema = TypeDescription.createStruct().addField("timestamp", TypeDescription.createString()).addField("col1", TypeDescription.createString()).addField("col2", TypeDescription.createList(TypeDescription.createString())).addField("val1", TypeDescription.createFloat());
    Configuration conf = new Configuration();
    Writer writer = OrcFile.createWriter(new Path(testOrc.getPath()), OrcFile.writerOptions(conf).setSchema(schema).stripeSize(100000).bufferSize(10000).compress(CompressionKind.ZLIB).version(OrcFile.Version.CURRENT));
    VectorizedRowBatch batch = schema.createRowBatch();
    batch.size = 1;
    ((BytesColumnVector) batch.cols[0]).setRef(0, timestamp.getBytes(), 0, timestamp.length());
    ((BytesColumnVector) batch.cols[1]).setRef(0, col1.getBytes(), 0, col1.length());
    ListColumnVector listColumnVector = (ListColumnVector) batch.cols[2];
    listColumnVector.childCount = col2.length;
    listColumnVector.lengths[0] = 3;
    for (int idx = 0; idx < col2.length; idx++) {
        ((BytesColumnVector) listColumnVector.child).setRef(idx, col2[idx].getBytes(), 0, col2[idx].length());
    }
    ((DoubleColumnVector) batch.cols[3]).vector[0] = val1;
    writer.addRowBatch(batch);
    writer.close();
    return testOrc;
}
Also used : Path(org.apache.hadoop.fs.Path) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) DoubleColumnVector(org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector) Configuration(org.apache.hadoop.conf.Configuration) ListColumnVector(org.apache.hadoop.hive.ql.exec.vector.ListColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) TypeDescription(org.apache.orc.TypeDescription) OrcFile(org.apache.orc.OrcFile) File(java.io.File) Writer(org.apache.orc.Writer)

Aggregations

ListColumnVector (org.apache.hadoop.hive.ql.exec.vector.ListColumnVector)4 ArrayList (java.util.ArrayList)2 TypeDescription (org.apache.orc.TypeDescription)2 File (java.io.File)1 Configuration (org.apache.hadoop.conf.Configuration)1 Path (org.apache.hadoop.fs.Path)1 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)1 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)1 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)1 CharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo)1 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)1 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)1 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)1 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)1 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)1 UnionTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo)1 VarcharTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo)1 OrcFile (org.apache.orc.OrcFile)1 Writer (org.apache.orc.Writer)1