Search in sources :

Example 61 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class TestInputOutputFormat method testColumnProjectionWithAcid.

/**
   * Test column projection when using ACID.
   */
@Test
public void testColumnProjectionWithAcid() throws Exception {
    Path baseDir = new Path(workDir, "base_00100");
    testFilePath = new Path(baseDir, "bucket_00000");
    fs.mkdirs(baseDir);
    fs.delete(testFilePath, true);
    TypeDescription fileSchema = TypeDescription.fromString("struct<operation:int," + "originalTransaction:bigint,bucket:int,rowId:bigint," + "currentTransaction:bigint," + "row:struct<a:int,b:struct<c:int>,d:string>>");
    Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).fileSystem(fs).setSchema(fileSchema).compress(org.apache.orc.CompressionKind.NONE));
    VectorizedRowBatch batch = fileSchema.createRowBatch(1000);
    batch.size = 1000;
    StructColumnVector scv = (StructColumnVector) batch.cols[5];
    // operation
    batch.cols[0].isRepeating = true;
    ((LongColumnVector) batch.cols[0]).vector[0] = 0;
    // original transaction
    batch.cols[1].isRepeating = true;
    ((LongColumnVector) batch.cols[1]).vector[0] = 1;
    // bucket
    batch.cols[2].isRepeating = true;
    ((LongColumnVector) batch.cols[2]).vector[0] = 0;
    // current transaction
    batch.cols[4].isRepeating = true;
    ((LongColumnVector) batch.cols[4]).vector[0] = 1;
    LongColumnVector lcv = (LongColumnVector) ((StructColumnVector) scv.fields[1]).fields[0];
    for (int r = 0; r < 1000; r++) {
        // row id
        ((LongColumnVector) batch.cols[3]).vector[r] = r;
        // a
        ((LongColumnVector) scv.fields[0]).vector[r] = r * 42;
        // b.c
        lcv.vector[r] = r * 10001;
        // d
        ((BytesColumnVector) scv.fields[2]).setVal(r, Integer.toHexString(r).getBytes(StandardCharsets.UTF_8));
    }
    writer.addRowBatch(batch);
    writer.addUserMetadata(OrcRecordUpdater.ACID_KEY_INDEX_NAME, ByteBuffer.wrap("0,0,999".getBytes(StandardCharsets.UTF_8)));
    writer.close();
    long fileLength = fs.getFileStatus(testFilePath).getLen();
    // test with same schema with include
    conf.set(ValidTxnList.VALID_TXNS_KEY, "100:99:");
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "a,b,d");
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "int,struct<c:int>,string");
    conf.set(ColumnProjectionUtils.READ_ALL_COLUMNS, "false");
    conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0,2");
    OrcSplit split = new OrcSplit(testFilePath, null, 0, fileLength, new String[0], null, false, true, new ArrayList<AcidInputFormat.DeltaMetaData>(), fileLength, fileLength);
    OrcInputFormat inputFormat = new OrcInputFormat();
    AcidInputFormat.RowReader<OrcStruct> reader = inputFormat.getReader(split, new AcidInputFormat.Options(conf));
    int record = 0;
    RecordIdentifier id = reader.createKey();
    OrcStruct struct = reader.createValue();
    while (reader.next(id, struct)) {
        assertEquals("id " + record, record, id.getRowId());
        assertEquals("bucket " + record, 0, id.getBucketId());
        assertEquals("trans " + record, 1, id.getTransactionId());
        assertEquals("a " + record, 42 * record, ((IntWritable) struct.getFieldValue(0)).get());
        assertEquals(null, struct.getFieldValue(1));
        assertEquals("d " + record, Integer.toHexString(record), struct.getFieldValue(2).toString());
        record += 1;
    }
    assertEquals(1000, record);
    reader.close();
    // test with schema evolution and include
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "a,b,d,f");
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "int,struct<c:int,e:string>,string,int");
    conf.set(ColumnProjectionUtils.READ_ALL_COLUMNS, "false");
    conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0,2,3");
    split = new OrcSplit(testFilePath, null, 0, fileLength, new String[0], null, false, true, new ArrayList<AcidInputFormat.DeltaMetaData>(), fileLength, fileLength);
    inputFormat = new OrcInputFormat();
    reader = inputFormat.getReader(split, new AcidInputFormat.Options(conf));
    record = 0;
    id = reader.createKey();
    struct = reader.createValue();
    while (reader.next(id, struct)) {
        assertEquals("id " + record, record, id.getRowId());
        assertEquals("bucket " + record, 0, id.getBucketId());
        assertEquals("trans " + record, 1, id.getTransactionId());
        assertEquals("a " + record, 42 * record, ((IntWritable) struct.getFieldValue(0)).get());
        assertEquals(null, struct.getFieldValue(1));
        assertEquals("d " + record, Integer.toHexString(record), struct.getFieldValue(2).toString());
        assertEquals("f " + record, null, struct.getFieldValue(3));
        record += 1;
    }
    assertEquals(1000, record);
    reader.close();
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) AcidInputFormat(org.apache.hadoop.hive.ql.io.AcidInputFormat) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) StructColumnVector(org.apache.hadoop.hive.ql.exec.vector.StructColumnVector) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) TypeDescription(org.apache.orc.TypeDescription) RecordWriter(org.apache.hadoop.mapred.RecordWriter) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) Test(org.junit.Test)

Example 62 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class RecordReaderImpl method nextBinary.

static BytesWritable nextBinary(ColumnVector vector, int row, Object previous) {
    if (vector.isRepeating) {
        row = 0;
    }
    if (vector.noNulls || !vector.isNull[row]) {
        BytesWritable result;
        if (previous == null || previous.getClass() != BytesWritable.class) {
            result = new BytesWritable();
        } else {
            result = (BytesWritable) previous;
        }
        BytesColumnVector bytes = (BytesColumnVector) vector;
        result.set(bytes.vector[row], bytes.start[row], bytes.length[row]);
        return result;
    } else {
        return null;
    }
}
Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) BytesWritable(org.apache.hadoop.io.BytesWritable)

Example 63 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class RecordReaderImpl method copyBytesColumn.

void copyBytesColumn(ColumnVector destination, ColumnVector source, int sourceOffset, int length) {
    BytesColumnVector castedSource = (BytesColumnVector) source;
    BytesColumnVector castedDestination = (BytesColumnVector) destination;
    castedDestination.isRepeating = castedSource.isRepeating;
    castedDestination.noNulls = castedSource.noNulls;
    if (source.isRepeating) {
        castedDestination.isNull[0] = castedSource.isNull[0];
        if (!castedSource.isNull[0]) {
            castedDestination.setVal(0, castedSource.vector[0], castedSource.start[0], castedSource.length[0]);
        }
    } else {
        if (!castedSource.noNulls) {
            for (int r = 0; r < length; ++r) {
                castedDestination.isNull[r] = castedSource.isNull[sourceOffset + r];
                if (!castedDestination.isNull[r]) {
                    castedDestination.setVal(r, castedSource.vector[sourceOffset + r], castedSource.start[sourceOffset + r], castedSource.length[sourceOffset + r]);
                }
            }
        } else {
            for (int r = 0; r < length; ++r) {
                castedDestination.setVal(r, castedSource.vector[sourceOffset + r], castedSource.start[sourceOffset + r], castedSource.length[sourceOffset + r]);
            }
        }
    }
}
Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)

Example 64 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class RecordReaderImpl method nextVarchar.

static HiveVarcharWritable nextVarchar(ColumnVector vector, int row, int size, Object previous) {
    if (vector.isRepeating) {
        row = 0;
    }
    if (vector.noNulls || !vector.isNull[row]) {
        HiveVarcharWritable result;
        if (previous == null || previous.getClass() != HiveVarcharWritable.class) {
            result = new HiveVarcharWritable();
        } else {
            result = (HiveVarcharWritable) previous;
        }
        BytesColumnVector bytes = (BytesColumnVector) vector;
        result.set(bytes.toString(row), size);
        return result;
    } else {
        return null;
    }
}
Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable)

Example 65 with BytesColumnVector

use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.

the class RecordReaderImpl method nextChar.

static HiveCharWritable nextChar(ColumnVector vector, int row, int size, Object previous) {
    if (vector.isRepeating) {
        row = 0;
    }
    if (vector.noNulls || !vector.isNull[row]) {
        HiveCharWritable result;
        if (previous == null || previous.getClass() != HiveCharWritable.class) {
            result = new HiveCharWritable();
        } else {
            result = (HiveCharWritable) previous;
        }
        BytesColumnVector bytes = (BytesColumnVector) vector;
        result.set(bytes.toString(row), size);
        return result;
    } else {
        return null;
    }
}
Also used : BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable)

Aggregations

BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)124 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)66 Test (org.junit.Test)50 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)44 TestVectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.TestVectorizedRowBatch)12 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)10 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)8 TimestampColumnVector (org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector)8 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)8 Text (org.apache.hadoop.io.Text)8 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)6 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 Path (org.apache.hadoop.fs.Path)4 JoinUtil (org.apache.hadoop.hive.ql.exec.JoinUtil)4 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)4 TypeDescription (org.apache.orc.TypeDescription)4 UnsupportedEncodingException (java.io.UnsupportedEncodingException)3 ParseException (java.text.ParseException)3 Random (java.util.Random)3