Search in sources :

Example 6 with BinaryComparable

use of org.apache.hadoop.io.BinaryComparable in project hive by apache.

the class LazyBinarySerDe method deserialize.

/**
 * Deserialize a table record to a lazybinary struct.
 */
@Override
public Object deserialize(Writable field) throws SerDeException {
    if (byteArrayRef == null) {
        byteArrayRef = new ByteArrayRef();
    }
    BinaryComparable b = (BinaryComparable) field;
    if (b.getLength() == 0) {
        return null;
    }
    byteArrayRef.setData(b.getBytes());
    cachedLazyBinaryStruct.init(byteArrayRef, 0, b.getLength());
    lastOperationSerialize = false;
    lastOperationDeserialize = true;
    return cachedLazyBinaryStruct;
}
Also used : BinaryComparable(org.apache.hadoop.io.BinaryComparable) ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)

Example 7 with BinaryComparable

use of org.apache.hadoop.io.BinaryComparable in project hive by apache.

the class LazySimpleSerDe method doDeserialize.

/**
 * Deserialize a row from the Writable to a LazyObject.
 *
 * @param field
 *          the Writable that contains the data
 * @return The deserialized row Object.
 * @see  org.apache.hadoop.hive.serde2.AbstractSerDe#deserialize(Writable)
 */
@Override
public Object doDeserialize(Writable field) throws SerDeException {
    if (byteArrayRef == null) {
        byteArrayRef = new ByteArrayRef();
    }
    BinaryComparable b = (BinaryComparable) field;
    byteArrayRef.setData(b.getBytes());
    cachedLazyStruct.init(byteArrayRef, 0, b.getLength());
    lastOperationSerialize = false;
    lastOperationDeserialize = true;
    return cachedLazyStruct;
}
Also used : BinaryComparable(org.apache.hadoop.io.BinaryComparable)

Example 8 with BinaryComparable

use of org.apache.hadoop.io.BinaryComparable in project hadoop by apache.

the class TestBinaryPartitioner method testCustomOffsets.

@Test
public void testCustomOffsets() {
    Configuration conf = new Configuration();
    BinaryComparable key1 = new BytesWritable(new byte[] { 1, 2, 3, 4, 5 });
    BinaryComparable key2 = new BytesWritable(new byte[] { 6, 2, 3, 7, 8 });
    BinaryPartitioner.setOffsets(conf, 1, -3);
    BinaryPartitioner<?> partitioner = ReflectionUtils.newInstance(BinaryPartitioner.class, conf);
    int partition1 = partitioner.getPartition(key1, null, 10);
    int partition2 = partitioner.getPartition(key2, null, 10);
    assertEquals(partition1, partition2);
    BinaryPartitioner.setOffsets(conf, 1, 2);
    partitioner = ReflectionUtils.newInstance(BinaryPartitioner.class, conf);
    partition1 = partitioner.getPartition(key1, null, 10);
    partition2 = partitioner.getPartition(key2, null, 10);
    assertEquals(partition1, partition2);
    BinaryPartitioner.setOffsets(conf, -4, -3);
    partitioner = ReflectionUtils.newInstance(BinaryPartitioner.class, conf);
    partition1 = partitioner.getPartition(key1, null, 10);
    partition2 = partitioner.getPartition(key2, null, 10);
    assertEquals(partition1, partition2);
}
Also used : BinaryComparable(org.apache.hadoop.io.BinaryComparable) Configuration(org.apache.hadoop.conf.Configuration) BytesWritable(org.apache.hadoop.io.BytesWritable) Test(org.junit.Test)

Example 9 with BinaryComparable

use of org.apache.hadoop.io.BinaryComparable in project hadoop by apache.

the class TestBinaryPartitioner method testLowerBound.

@Test
public void testLowerBound() {
    Configuration conf = new Configuration();
    BinaryPartitioner.setLeftOffset(conf, 0);
    BinaryPartitioner<?> partitioner = ReflectionUtils.newInstance(BinaryPartitioner.class, conf);
    BinaryComparable key1 = new BytesWritable(new byte[] { 1, 2, 3, 4, 5 });
    BinaryComparable key2 = new BytesWritable(new byte[] { 6, 2, 3, 4, 5 });
    int partition1 = partitioner.getPartition(key1, null, 10);
    int partition2 = partitioner.getPartition(key2, null, 10);
    assertTrue(partition1 != partition2);
}
Also used : BinaryComparable(org.apache.hadoop.io.BinaryComparable) Configuration(org.apache.hadoop.conf.Configuration) BytesWritable(org.apache.hadoop.io.BytesWritable) Test(org.junit.Test)

Example 10 with BinaryComparable

use of org.apache.hadoop.io.BinaryComparable in project hive by apache.

the class VectorMapOperator method process.

@Override
public void process(Writable value) throws HiveException {
    // A mapper can span multiple files/partitions.
    // The VectorPartitionContext need to be changed if the input file changed
    ExecMapperContext context = getExecContext();
    if (context != null && context.inputFileChanged()) {
        // The child operators cleanup if input file has changed
        cleanUpInputFileChanged();
    }
    if (!oneRootOperator.getDone()) {
        /*
       * 3 different kinds of vectorized reading supported:
       *
       *   1) Read the Vectorized Input File Format which returns VectorizedRowBatch as the row.
       *
       *   2) Read using VectorDeserializeRow to deserialize each row into the VectorizedRowBatch.
       *
       *   3) And read using the regular partition deserializer to get the row object and assigning
       *      the row object into the VectorizedRowBatch with VectorAssignRow.
       */
        try {
            if (currentReadType == VectorMapOperatorReadType.VECTORIZED_INPUT_FILE_FORMAT) {
                if (!deliverVectorizedRowBatch(value)) {
                    // Operator tree is now done.
                    return;
                }
            } else if (value instanceof VectorizedRowBatch) {
                /*
           * Clear out any rows we may have processed in row-mode for the current partition..
           */
                if (!flushDeserializerBatch()) {
                    // Operator tree is now done.
                    return;
                }
                if (!deliverVectorizedRowBatch(value)) {
                    // Operator tree is now done.
                    return;
                }
            } else {
                /*
           * We have a "regular" single rows from the Input File Format reader that we will need
           * to deserialize.
           */
                Preconditions.checkState(currentReadType == VectorMapOperatorReadType.VECTOR_DESERIALIZE || currentReadType == VectorMapOperatorReadType.ROW_DESERIALIZE);
                if (deserializerBatch.size == deserializerBatch.DEFAULT_SIZE) {
                    numRows += deserializerBatch.size;
                    /*
             * Feed current full batch to operator tree.
             */
                    batchCounter++;
                    oneRootOperator.process(deserializerBatch, 0);
                    /**
                     * Only reset the current data columns.  Not any data columns defaulted to NULL
                     * because they are not present in the partition, and not partition columns.
                     */
                    for (int c = 0; c < currentDataColumnCount; c++) {
                        ColumnVector colVector = deserializerBatch.cols[c];
                        if (colVector != null) {
                            colVector.reset();
                            colVector.init();
                        }
                    }
                    deserializerBatch.selectedInUse = false;
                    deserializerBatch.size = 0;
                    deserializerBatch.endOfFile = false;
                    if (oneRootOperator.getDone()) {
                        setDone(true);
                        return;
                    }
                }
                /*
           * Do the {vector|row} deserialization of the one row into the VectorizedRowBatch.
           */
                switch(currentReadType) {
                    case VECTOR_DESERIALIZE:
                        {
                            BinaryComparable binComp = (BinaryComparable) value;
                            currentDeserializeRead.set(binComp.getBytes(), 0, binComp.getLength());
                            // Deserialize and append new row using the current batch size as the index.
                            try {
                                currentVectorDeserializeRow.deserialize(deserializerBatch, deserializerBatch.size++);
                            } catch (Exception e) {
                                throw new HiveException("\nDeserializeRead detail: " + currentVectorDeserializeRow.getDetailedReadPositionString(), e);
                            }
                        }
                        break;
                    case ROW_DESERIALIZE:
                        {
                            Object deserialized = currentPartDeserializer.deserialize(value);
                            // Note: Regardless of what the Input File Format returns, we have determined
                            // with VectorAppendRow.initConversion that only currentDataColumnCount columns
                            // have values we want.
                            // 
                            // Any extra columns needed by the table schema were set to repeating null
                            // in the batch by setupPartitionContextVars.
                            // Convert input row to standard objects.
                            List<Object> standardObjects = new ArrayList<Object>();
                            ObjectInspectorUtils.copyToStandardObject(standardObjects, deserialized, currentPartRawRowObjectInspector, ObjectInspectorCopyOption.WRITABLE);
                            if (standardObjects.size() < currentDataColumnCount) {
                                throw new HiveException("Input File Format returned row with too few columns");
                            }
                            // Append the deserialized standard object row using the current batch size
                            // as the index.
                            currentVectorAssign.assignRow(deserializerBatch, deserializerBatch.size++, standardObjects, currentDataColumnCount);
                        }
                        break;
                    default:
                        throw new RuntimeException("Unexpected vector MapOperator read type " + currentReadType.name());
                }
            }
        } catch (Exception e) {
            throw new HiveException("Hive Runtime Error while processing row ", e);
        }
    }
}
Also used : ExecMapperContext(org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext) BinaryComparable(org.apache.hadoop.io.BinaryComparable) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Aggregations

BinaryComparable (org.apache.hadoop.io.BinaryComparable)10 Configuration (org.apache.hadoop.conf.Configuration)4 BytesWritable (org.apache.hadoop.io.BytesWritable)4 Test (org.junit.Test)4 IOException (java.io.IOException)2 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)2 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)2 ArrayList (java.util.ArrayList)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 ExecMapperContext (org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext)1 ByteArrayRef (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)1 RawComparator (org.apache.hadoop.io.RawComparator)1 Job (org.apache.hadoop.mapreduce.Job)1