use of org.apache.hadoop.io.BinaryComparable in project hive by apache.
the class LazyBinarySerDe method deserialize.
/**
* Deserialize a table record to a lazybinary struct.
*/
@Override
public Object deserialize(Writable field) throws SerDeException {
if (byteArrayRef == null) {
byteArrayRef = new ByteArrayRef();
}
BinaryComparable b = (BinaryComparable) field;
if (b.getLength() == 0) {
return null;
}
byteArrayRef.setData(b.getBytes());
cachedLazyBinaryStruct.init(byteArrayRef, 0, b.getLength());
lastOperationSerialize = false;
lastOperationDeserialize = true;
return cachedLazyBinaryStruct;
}
use of org.apache.hadoop.io.BinaryComparable in project hive by apache.
the class LazySimpleSerDe method doDeserialize.
/**
* Deserialize a row from the Writable to a LazyObject.
*
* @param field
* the Writable that contains the data
* @return The deserialized row Object.
* @see org.apache.hadoop.hive.serde2.AbstractSerDe#deserialize(Writable)
*/
@Override
public Object doDeserialize(Writable field) throws SerDeException {
if (byteArrayRef == null) {
byteArrayRef = new ByteArrayRef();
}
BinaryComparable b = (BinaryComparable) field;
byteArrayRef.setData(b.getBytes());
cachedLazyStruct.init(byteArrayRef, 0, b.getLength());
lastOperationSerialize = false;
lastOperationDeserialize = true;
return cachedLazyStruct;
}
use of org.apache.hadoop.io.BinaryComparable in project hadoop by apache.
the class TestBinaryPartitioner method testCustomOffsets.
@Test
public void testCustomOffsets() {
Configuration conf = new Configuration();
BinaryComparable key1 = new BytesWritable(new byte[] { 1, 2, 3, 4, 5 });
BinaryComparable key2 = new BytesWritable(new byte[] { 6, 2, 3, 7, 8 });
BinaryPartitioner.setOffsets(conf, 1, -3);
BinaryPartitioner<?> partitioner = ReflectionUtils.newInstance(BinaryPartitioner.class, conf);
int partition1 = partitioner.getPartition(key1, null, 10);
int partition2 = partitioner.getPartition(key2, null, 10);
assertEquals(partition1, partition2);
BinaryPartitioner.setOffsets(conf, 1, 2);
partitioner = ReflectionUtils.newInstance(BinaryPartitioner.class, conf);
partition1 = partitioner.getPartition(key1, null, 10);
partition2 = partitioner.getPartition(key2, null, 10);
assertEquals(partition1, partition2);
BinaryPartitioner.setOffsets(conf, -4, -3);
partitioner = ReflectionUtils.newInstance(BinaryPartitioner.class, conf);
partition1 = partitioner.getPartition(key1, null, 10);
partition2 = partitioner.getPartition(key2, null, 10);
assertEquals(partition1, partition2);
}
use of org.apache.hadoop.io.BinaryComparable in project hadoop by apache.
the class TestBinaryPartitioner method testLowerBound.
@Test
public void testLowerBound() {
Configuration conf = new Configuration();
BinaryPartitioner.setLeftOffset(conf, 0);
BinaryPartitioner<?> partitioner = ReflectionUtils.newInstance(BinaryPartitioner.class, conf);
BinaryComparable key1 = new BytesWritable(new byte[] { 1, 2, 3, 4, 5 });
BinaryComparable key2 = new BytesWritable(new byte[] { 6, 2, 3, 4, 5 });
int partition1 = partitioner.getPartition(key1, null, 10);
int partition2 = partitioner.getPartition(key2, null, 10);
assertTrue(partition1 != partition2);
}
use of org.apache.hadoop.io.BinaryComparable in project hive by apache.
the class VectorMapOperator method process.
@Override
public void process(Writable value) throws HiveException {
// A mapper can span multiple files/partitions.
// The VectorPartitionContext need to be changed if the input file changed
ExecMapperContext context = getExecContext();
if (context != null && context.inputFileChanged()) {
// The child operators cleanup if input file has changed
cleanUpInputFileChanged();
}
if (!oneRootOperator.getDone()) {
/*
* 3 different kinds of vectorized reading supported:
*
* 1) Read the Vectorized Input File Format which returns VectorizedRowBatch as the row.
*
* 2) Read using VectorDeserializeRow to deserialize each row into the VectorizedRowBatch.
*
* 3) And read using the regular partition deserializer to get the row object and assigning
* the row object into the VectorizedRowBatch with VectorAssignRow.
*/
try {
if (currentReadType == VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT) {
if (!deliverVectorizedRowBatch(value)) {
// Operator tree is now done.
return;
}
} else if (value instanceof VectorizedRowBatch) {
/*
* Clear out any rows we may have processed in row-mode for the current partition..
*/
if (!flushDeserializerBatch()) {
// Operator tree is now done.
return;
}
if (!deliverVectorizedRowBatch(value)) {
// Operator tree is now done.
return;
}
} else {
/*
* We have a "regular" single rows from the Input File Format reader that we will need
* to deserialize.
*/
Preconditions.checkState(currentReadType == VectorMapOperatorReadType.VECTOR_DESERIALIZE || currentReadType == VectorMapOperatorReadType.ROW_DESERIALIZE);
if (deserializerBatch.size == deserializerBatch.DEFAULT_SIZE) {
numRows += deserializerBatch.size;
/*
* Feed current full batch to operator tree.
*/
batchCounter++;
oneRootOperator.process(deserializerBatch, 0);
/**
* Only reset the current data columns. Not any data columns defaulted to NULL
* because they are not present in the partition, and not partition columns.
*/
for (int c = 0; c < currentDataColumnCount; c++) {
ColumnVector colVector = deserializerBatch.cols[c];
if (colVector != null) {
colVector.reset();
colVector.init();
}
}
deserializerBatch.selectedInUse = false;
deserializerBatch.size = 0;
deserializerBatch.endOfFile = false;
if (oneRootOperator.getDone()) {
setDone(true);
return;
}
}
/*
* Do the {vector|row} deserialization of the one row into the VectorizedRowBatch.
*/
switch(currentReadType) {
case VECTOR_DESERIALIZE:
{
BinaryComparable binComp = (BinaryComparable) value;
currentDeserializeRead.set(binComp.getBytes(), 0, binComp.getLength());
// Deserialize and append new row using the current batch size as the index.
try {
currentVectorDeserializeRow.deserialize(deserializerBatch, deserializerBatch.size++);
} catch (Exception e) {
throw new HiveException("\nDeserializeRead detail: " + currentVectorDeserializeRow.getDetailedReadPositionString(), e);
}
}
break;
case ROW_DESERIALIZE:
{
Object deserialized = currentPartDeserializer.deserialize(value);
// Note: Regardless of what the Input File Format returns, we have determined
// with VectorAppendRow.initConversion that only currentDataColumnCount columns
// have values we want.
//
// Any extra columns needed by the table schema were set to repeating null
// in the batch by setupPartitionContextVars.
// Convert input row to standard objects.
List<Object> standardObjects = new ArrayList<Object>();
ObjectInspectorUtils.copyToStandardObject(standardObjects, deserialized, currentPartRawRowObjectInspector, ObjectInspectorCopyOption.WRITABLE);
if (standardObjects.size() < currentDataColumnCount) {
throw new HiveException("Input File Format returned row with too few columns");
}
// Append the deserialized standard object row using the current batch size
// as the index.
currentVectorAssign.assignRow(deserializerBatch, deserializerBatch.size++, standardObjects, currentDataColumnCount);
}
break;
default:
throw new RuntimeException("Unexpected vector MapOperator read type " + currentReadType.name());
}
}
} catch (Exception e) {
throw new HiveException("Hive Runtime Error while processing row ", e);
}
}
}
Aggregations