Search in sources :

Example 1 with LongColumnVector

use of org.apache.orc.storage.ql.exec.vector.LongColumnVector in project flink by apache.

the class OrcColumnarRowSplitReaderNoHiveTest method prepareReadFileWithTypes.

@Override
protected void prepareReadFileWithTypes(String file, int rowSize) throws IOException {
    // NOTE: orc has field name information, so name should be same as orc
    TypeDescription schema = TypeDescription.fromString("struct<" + "f0:float," + "f1:double," + "f2:timestamp," + "f3:tinyint," + "f4:smallint" + ">");
    org.apache.hadoop.fs.Path filePath = new org.apache.hadoop.fs.Path(file);
    Configuration conf = new Configuration();
    Writer writer = OrcFile.createWriter(filePath, OrcFile.writerOptions(conf).setSchema(schema));
    VectorizedRowBatch batch = schema.createRowBatch(rowSize);
    DoubleColumnVector col0 = (DoubleColumnVector) batch.cols[0];
    DoubleColumnVector col1 = (DoubleColumnVector) batch.cols[1];
    TimestampColumnVector col2 = (TimestampColumnVector) batch.cols[2];
    LongColumnVector col3 = (LongColumnVector) batch.cols[3];
    LongColumnVector col4 = (LongColumnVector) batch.cols[4];
    col0.noNulls = false;
    col1.noNulls = false;
    col2.noNulls = false;
    col3.noNulls = false;
    col4.noNulls = false;
    for (int i = 0; i < rowSize - 1; i++) {
        col0.vector[i] = i;
        col1.vector[i] = i;
        Timestamp timestamp = toTimestamp(i);
        col2.time[i] = timestamp.getTime();
        col2.nanos[i] = timestamp.getNanos();
        col3.vector[i] = i;
        col4.vector[i] = i;
    }
    col0.isNull[rowSize - 1] = true;
    col1.isNull[rowSize - 1] = true;
    col2.isNull[rowSize - 1] = true;
    col3.isNull[rowSize - 1] = true;
    col4.isNull[rowSize - 1] = true;
    batch.size = rowSize;
    writer.addRowBatch(batch);
    batch.reset();
    writer.close();
}
Also used : TimestampColumnVector(org.apache.orc.storage.ql.exec.vector.TimestampColumnVector) DoubleColumnVector(org.apache.orc.storage.ql.exec.vector.DoubleColumnVector) Configuration(org.apache.hadoop.conf.Configuration) Timestamp(java.sql.Timestamp) VectorizedRowBatch(org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch) TypeDescription(org.apache.orc.TypeDescription) Writer(org.apache.orc.Writer) LongColumnVector(org.apache.orc.storage.ql.exec.vector.LongColumnVector)

Example 2 with LongColumnVector

use of org.apache.orc.storage.ql.exec.vector.LongColumnVector in project incubator-gobblin by apache.

the class GobblinBaseOrcWriter method removeRefOfColumnVectorChild.

/**
 * Set the child field of {@link ColumnVector} to null, assuming input {@link ColumnVector} is nonNull.
 */
private void removeRefOfColumnVectorChild(ColumnVector cv) {
    if (cv instanceof StructColumnVector) {
        StructColumnVector structCv = (StructColumnVector) cv;
        for (ColumnVector childCv : structCv.fields) {
            removeRefOfColumnVectorChild(childCv);
        }
    } else if (cv instanceof ListColumnVector) {
        ListColumnVector listCv = (ListColumnVector) cv;
        removeRefOfColumnVectorChild(listCv.child);
    } else if (cv instanceof MapColumnVector) {
        MapColumnVector mapCv = (MapColumnVector) cv;
        removeRefOfColumnVectorChild(mapCv.keys);
        removeRefOfColumnVectorChild(mapCv.values);
    } else if (cv instanceof UnionColumnVector) {
        UnionColumnVector unionCv = (UnionColumnVector) cv;
        for (ColumnVector unionChildCv : unionCv.fields) {
            removeRefOfColumnVectorChild(unionChildCv);
        }
    } else if (cv instanceof LongColumnVector) {
        ((LongColumnVector) cv).vector = null;
    } else if (cv instanceof DoubleColumnVector) {
        ((DoubleColumnVector) cv).vector = null;
    } else if (cv instanceof BytesColumnVector) {
        ((BytesColumnVector) cv).vector = null;
        ((BytesColumnVector) cv).start = null;
        ((BytesColumnVector) cv).length = null;
    } else if (cv instanceof DecimalColumnVector) {
        ((DecimalColumnVector) cv).vector = null;
    }
}
Also used : DecimalColumnVector(org.apache.orc.storage.ql.exec.vector.DecimalColumnVector) DoubleColumnVector(org.apache.orc.storage.ql.exec.vector.DoubleColumnVector) ListColumnVector(org.apache.orc.storage.ql.exec.vector.ListColumnVector) MapColumnVector(org.apache.orc.storage.ql.exec.vector.MapColumnVector) StructColumnVector(org.apache.orc.storage.ql.exec.vector.StructColumnVector) UnionColumnVector(org.apache.orc.storage.ql.exec.vector.UnionColumnVector) BytesColumnVector(org.apache.orc.storage.ql.exec.vector.BytesColumnVector) LongColumnVector(org.apache.orc.storage.ql.exec.vector.LongColumnVector) ListColumnVector(org.apache.orc.storage.ql.exec.vector.ListColumnVector) UnionColumnVector(org.apache.orc.storage.ql.exec.vector.UnionColumnVector) BytesColumnVector(org.apache.orc.storage.ql.exec.vector.BytesColumnVector) StructColumnVector(org.apache.orc.storage.ql.exec.vector.StructColumnVector) DecimalColumnVector(org.apache.orc.storage.ql.exec.vector.DecimalColumnVector) LongColumnVector(org.apache.orc.storage.ql.exec.vector.LongColumnVector) ColumnVector(org.apache.orc.storage.ql.exec.vector.ColumnVector) MapColumnVector(org.apache.orc.storage.ql.exec.vector.MapColumnVector) DoubleColumnVector(org.apache.orc.storage.ql.exec.vector.DoubleColumnVector)

Example 3 with LongColumnVector

use of org.apache.orc.storage.ql.exec.vector.LongColumnVector in project flink by apache.

the class AbstractOrcNoHiveVector method createLongVector.

private static LongColumnVector createLongVector(int batchSize, Object value) {
    LongColumnVector lcv = new LongColumnVector(batchSize);
    if (value == null) {
        lcv.noNulls = false;
        lcv.isNull[0] = true;
        lcv.isRepeating = true;
    } else {
        lcv.fill(((Number) value).longValue());
        lcv.isNull[0] = false;
    }
    return lcv;
}
Also used : LongColumnVector(org.apache.orc.storage.ql.exec.vector.LongColumnVector)

Aggregations

LongColumnVector (org.apache.orc.storage.ql.exec.vector.LongColumnVector)3 DoubleColumnVector (org.apache.orc.storage.ql.exec.vector.DoubleColumnVector)2 Timestamp (java.sql.Timestamp)1 Configuration (org.apache.hadoop.conf.Configuration)1 TypeDescription (org.apache.orc.TypeDescription)1 Writer (org.apache.orc.Writer)1 BytesColumnVector (org.apache.orc.storage.ql.exec.vector.BytesColumnVector)1 ColumnVector (org.apache.orc.storage.ql.exec.vector.ColumnVector)1 DecimalColumnVector (org.apache.orc.storage.ql.exec.vector.DecimalColumnVector)1 ListColumnVector (org.apache.orc.storage.ql.exec.vector.ListColumnVector)1 MapColumnVector (org.apache.orc.storage.ql.exec.vector.MapColumnVector)1 StructColumnVector (org.apache.orc.storage.ql.exec.vector.StructColumnVector)1 TimestampColumnVector (org.apache.orc.storage.ql.exec.vector.TimestampColumnVector)1 UnionColumnVector (org.apache.orc.storage.ql.exec.vector.UnionColumnVector)1 VectorizedRowBatch (org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch)1