use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class TestInputOutputFormat method testColumnProjectionWithAcid.
/**
* Test column projection when using ACID.
*/
@Test
public void testColumnProjectionWithAcid() throws Exception {
Path baseDir = new Path(workDir, "base_00100");
testFilePath = new Path(baseDir, "bucket_00000");
fs.mkdirs(baseDir);
fs.delete(testFilePath, true);
TypeDescription fileSchema = TypeDescription.fromString("struct<operation:int," + "originalTransaction:bigint,bucket:int,rowId:bigint," + "currentTransaction:bigint," + "row:struct<a:int,b:struct<c:int>,d:string>>");
Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).fileSystem(fs).setSchema(fileSchema).compress(org.apache.orc.CompressionKind.NONE));
VectorizedRowBatch batch = fileSchema.createRowBatch(1000);
batch.size = 1000;
StructColumnVector scv = (StructColumnVector) batch.cols[5];
// operation
batch.cols[0].isRepeating = true;
((LongColumnVector) batch.cols[0]).vector[0] = 0;
// original transaction
batch.cols[1].isRepeating = true;
((LongColumnVector) batch.cols[1]).vector[0] = 1;
// bucket
batch.cols[2].isRepeating = true;
((LongColumnVector) batch.cols[2]).vector[0] = 0;
// current transaction
batch.cols[4].isRepeating = true;
((LongColumnVector) batch.cols[4]).vector[0] = 1;
LongColumnVector lcv = (LongColumnVector) ((StructColumnVector) scv.fields[1]).fields[0];
for (int r = 0; r < 1000; r++) {
// row id
((LongColumnVector) batch.cols[3]).vector[r] = r;
// a
((LongColumnVector) scv.fields[0]).vector[r] = r * 42;
// b.c
lcv.vector[r] = r * 10001;
// d
((BytesColumnVector) scv.fields[2]).setVal(r, Integer.toHexString(r).getBytes(StandardCharsets.UTF_8));
}
writer.addRowBatch(batch);
writer.addUserMetadata(OrcRecordUpdater.ACID_KEY_INDEX_NAME, ByteBuffer.wrap("0,0,999".getBytes(StandardCharsets.UTF_8)));
writer.close();
long fileLength = fs.getFileStatus(testFilePath).getLen();
// test with same schema with include
conf.set(ValidTxnList.VALID_TXNS_KEY, "100:99:");
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "a,b,d");
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "int,struct<c:int>,string");
conf.set(ColumnProjectionUtils.READ_ALL_COLUMNS, "false");
conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0,2");
OrcSplit split = new OrcSplit(testFilePath, null, 0, fileLength, new String[0], null, false, true, new ArrayList<AcidInputFormat.DeltaMetaData>(), fileLength, fileLength);
OrcInputFormat inputFormat = new OrcInputFormat();
AcidInputFormat.RowReader<OrcStruct> reader = inputFormat.getReader(split, new AcidInputFormat.Options(conf));
int record = 0;
RecordIdentifier id = reader.createKey();
OrcStruct struct = reader.createValue();
while (reader.next(id, struct)) {
assertEquals("id " + record, record, id.getRowId());
assertEquals("bucket " + record, 0, id.getBucketId());
assertEquals("trans " + record, 1, id.getTransactionId());
assertEquals("a " + record, 42 * record, ((IntWritable) struct.getFieldValue(0)).get());
assertEquals(null, struct.getFieldValue(1));
assertEquals("d " + record, Integer.toHexString(record), struct.getFieldValue(2).toString());
record += 1;
}
assertEquals(1000, record);
reader.close();
// test with schema evolution and include
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "a,b,d,f");
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "int,struct<c:int,e:string>,string,int");
conf.set(ColumnProjectionUtils.READ_ALL_COLUMNS, "false");
conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0,2,3");
split = new OrcSplit(testFilePath, null, 0, fileLength, new String[0], null, false, true, new ArrayList<AcidInputFormat.DeltaMetaData>(), fileLength, fileLength);
inputFormat = new OrcInputFormat();
reader = inputFormat.getReader(split, new AcidInputFormat.Options(conf));
record = 0;
id = reader.createKey();
struct = reader.createValue();
while (reader.next(id, struct)) {
assertEquals("id " + record, record, id.getRowId());
assertEquals("bucket " + record, 0, id.getBucketId());
assertEquals("trans " + record, 1, id.getTransactionId());
assertEquals("a " + record, 42 * record, ((IntWritable) struct.getFieldValue(0)).get());
assertEquals(null, struct.getFieldValue(1));
assertEquals("d " + record, Integer.toHexString(record), struct.getFieldValue(2).toString());
assertEquals("f " + record, null, struct.getFieldValue(3));
record += 1;
}
assertEquals(1000, record);
reader.close();
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class RecordReaderImpl method nextBinary.
static BytesWritable nextBinary(ColumnVector vector, int row, Object previous) {
if (vector.isRepeating) {
row = 0;
}
if (vector.noNulls || !vector.isNull[row]) {
BytesWritable result;
if (previous == null || previous.getClass() != BytesWritable.class) {
result = new BytesWritable();
} else {
result = (BytesWritable) previous;
}
BytesColumnVector bytes = (BytesColumnVector) vector;
result.set(bytes.vector[row], bytes.start[row], bytes.length[row]);
return result;
} else {
return null;
}
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class RecordReaderImpl method copyBytesColumn.
void copyBytesColumn(ColumnVector destination, ColumnVector source, int sourceOffset, int length) {
BytesColumnVector castedSource = (BytesColumnVector) source;
BytesColumnVector castedDestination = (BytesColumnVector) destination;
castedDestination.isRepeating = castedSource.isRepeating;
castedDestination.noNulls = castedSource.noNulls;
if (source.isRepeating) {
castedDestination.isNull[0] = castedSource.isNull[0];
if (!castedSource.isNull[0]) {
castedDestination.setVal(0, castedSource.vector[0], castedSource.start[0], castedSource.length[0]);
}
} else {
if (!castedSource.noNulls) {
for (int r = 0; r < length; ++r) {
castedDestination.isNull[r] = castedSource.isNull[sourceOffset + r];
if (!castedDestination.isNull[r]) {
castedDestination.setVal(r, castedSource.vector[sourceOffset + r], castedSource.start[sourceOffset + r], castedSource.length[sourceOffset + r]);
}
}
} else {
for (int r = 0; r < length; ++r) {
castedDestination.setVal(r, castedSource.vector[sourceOffset + r], castedSource.start[sourceOffset + r], castedSource.length[sourceOffset + r]);
}
}
}
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class RecordReaderImpl method nextVarchar.
static HiveVarcharWritable nextVarchar(ColumnVector vector, int row, int size, Object previous) {
if (vector.isRepeating) {
row = 0;
}
if (vector.noNulls || !vector.isNull[row]) {
HiveVarcharWritable result;
if (previous == null || previous.getClass() != HiveVarcharWritable.class) {
result = new HiveVarcharWritable();
} else {
result = (HiveVarcharWritable) previous;
}
BytesColumnVector bytes = (BytesColumnVector) vector;
result.set(bytes.toString(row), size);
return result;
} else {
return null;
}
}
use of org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector in project hive by apache.
the class RecordReaderImpl method nextChar.
static HiveCharWritable nextChar(ColumnVector vector, int row, int size, Object previous) {
if (vector.isRepeating) {
row = 0;
}
if (vector.noNulls || !vector.isNull[row]) {
HiveCharWritable result;
if (previous == null || previous.getClass() != HiveCharWritable.class) {
result = new HiveCharWritable();
} else {
result = (HiveCharWritable) previous;
}
BytesColumnVector bytes = (BytesColumnVector) vector;
result.set(bytes.toString(row), size);
return result;
} else {
return null;
}
}
Aggregations