Search in sources :

Example 6 with DeserializeRead

use of org.apache.hadoop.hive.serde2.fast.DeserializeRead in project hive by apache.

the class VerifyFast method verifyDeserializeRead.

public static void verifyDeserializeRead(DeserializeRead deserializeRead, PrimitiveTypeInfo primitiveTypeInfo, Writable writable) throws IOException {
    boolean isNull;
    isNull = !deserializeRead.readNextField();
    if (isNull) {
        if (writable != null) {
            TestCase.fail("Field reports null but object is not null (class " + writable.getClass().getName() + ", " + writable.toString() + ")");
        }
        return;
    } else if (writable == null) {
        TestCase.fail("Field report not null but object is null");
    }
    switch(primitiveTypeInfo.getPrimitiveCategory()) {
        case BOOLEAN:
            {
                boolean value = deserializeRead.currentBoolean;
                if (!(writable instanceof BooleanWritable)) {
                    TestCase.fail("Boolean expected writable not Boolean");
                }
                boolean expected = ((BooleanWritable) writable).get();
                if (value != expected) {
                    TestCase.fail("Boolean field mismatch (expected " + expected + " found " + value + ")");
                }
            }
            break;
        case BYTE:
            {
                byte value = deserializeRead.currentByte;
                if (!(writable instanceof ByteWritable)) {
                    TestCase.fail("Byte expected writable not Byte");
                }
                byte expected = ((ByteWritable) writable).get();
                if (value != expected) {
                    TestCase.fail("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")");
                }
            }
            break;
        case SHORT:
            {
                short value = deserializeRead.currentShort;
                if (!(writable instanceof ShortWritable)) {
                    TestCase.fail("Short expected writable not Short");
                }
                short expected = ((ShortWritable) writable).get();
                if (value != expected) {
                    TestCase.fail("Short field mismatch (expected " + expected + " found " + value + ")");
                }
            }
            break;
        case INT:
            {
                int value = deserializeRead.currentInt;
                if (!(writable instanceof IntWritable)) {
                    TestCase.fail("Integer expected writable not Integer");
                }
                int expected = ((IntWritable) writable).get();
                if (value != expected) {
                    TestCase.fail("Int field mismatch (expected " + expected + " found " + value + ")");
                }
            }
            break;
        case LONG:
            {
                long value = deserializeRead.currentLong;
                if (!(writable instanceof LongWritable)) {
                    TestCase.fail("Long expected writable not Long");
                }
                Long expected = ((LongWritable) writable).get();
                if (value != expected) {
                    TestCase.fail("Long field mismatch (expected " + expected + " found " + value + ")");
                }
            }
            break;
        case FLOAT:
            {
                float value = deserializeRead.currentFloat;
                if (!(writable instanceof FloatWritable)) {
                    TestCase.fail("Float expected writable not Float");
                }
                float expected = ((FloatWritable) writable).get();
                if (value != expected) {
                    TestCase.fail("Float field mismatch (expected " + expected + " found " + value + ")");
                }
            }
            break;
        case DOUBLE:
            {
                double value = deserializeRead.currentDouble;
                if (!(writable instanceof DoubleWritable)) {
                    TestCase.fail("Double expected writable not Double");
                }
                double expected = ((DoubleWritable) writable).get();
                if (value != expected) {
                    TestCase.fail("Double field mismatch (expected " + expected + " found " + value + ")");
                }
            }
            break;
        case STRING:
            {
                byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
                Text text = new Text(stringBytes);
                String string = text.toString();
                String expected = ((Text) writable).toString();
                if (!string.equals(expected)) {
                    TestCase.fail("String field mismatch (expected '" + expected + "' found '" + string + "')");
                }
            }
            break;
        case CHAR:
            {
                byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
                Text text = new Text(stringBytes);
                String string = text.toString();
                HiveChar hiveChar = new HiveChar(string, ((CharTypeInfo) primitiveTypeInfo).getLength());
                HiveChar expected = ((HiveCharWritable) writable).getHiveChar();
                if (!hiveChar.equals(expected)) {
                    TestCase.fail("Char field mismatch (expected '" + expected + "' found '" + hiveChar + "')");
                }
            }
            break;
        case VARCHAR:
            {
                byte[] stringBytes = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
                Text text = new Text(stringBytes);
                String string = text.toString();
                HiveVarchar hiveVarchar = new HiveVarchar(string, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
                HiveVarchar expected = ((HiveVarcharWritable) writable).getHiveVarchar();
                if (!hiveVarchar.equals(expected)) {
                    TestCase.fail("Varchar field mismatch (expected '" + expected + "' found '" + hiveVarchar + "')");
                }
            }
            break;
        case DECIMAL:
            {
                HiveDecimal value = deserializeRead.currentHiveDecimalWritable.getHiveDecimal();
                if (value == null) {
                    TestCase.fail("Decimal field evaluated to NULL");
                }
                HiveDecimal expected = ((HiveDecimalWritable) writable).getHiveDecimal();
                if (!value.equals(expected)) {
                    DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo;
                    int precision = decimalTypeInfo.getPrecision();
                    int scale = decimalTypeInfo.getScale();
                    TestCase.fail("Decimal field mismatch (expected " + expected.toString() + " found " + value.toString() + ") precision " + precision + ", scale " + scale);
                }
            }
            break;
        case DATE:
            {
                Date value = deserializeRead.currentDateWritable.get();
                Date expected = ((DateWritable) writable).get();
                if (!value.equals(expected)) {
                    TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
                }
            }
            break;
        case TIMESTAMP:
            {
                Timestamp value = deserializeRead.currentTimestampWritable.getTimestamp();
                Timestamp expected = ((TimestampWritable) writable).getTimestamp();
                if (!value.equals(expected)) {
                    TestCase.fail("Timestamp field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
                }
            }
            break;
        case INTERVAL_YEAR_MONTH:
            {
                HiveIntervalYearMonth value = deserializeRead.currentHiveIntervalYearMonthWritable.getHiveIntervalYearMonth();
                HiveIntervalYearMonth expected = ((HiveIntervalYearMonthWritable) writable).getHiveIntervalYearMonth();
                if (!value.equals(expected)) {
                    TestCase.fail("HiveIntervalYearMonth field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
                }
            }
            break;
        case INTERVAL_DAY_TIME:
            {
                HiveIntervalDayTime value = deserializeRead.currentHiveIntervalDayTimeWritable.getHiveIntervalDayTime();
                HiveIntervalDayTime expected = ((HiveIntervalDayTimeWritable) writable).getHiveIntervalDayTime();
                if (!value.equals(expected)) {
                    TestCase.fail("HiveIntervalDayTime field mismatch (expected " + expected.toString() + " found " + value.toString() + ")");
                }
            }
            break;
        case BINARY:
            {
                byte[] byteArray = Arrays.copyOfRange(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength);
                BytesWritable bytesWritable = (BytesWritable) writable;
                byte[] expected = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());
                if (byteArray.length != expected.length) {
                    TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected) + " found " + Arrays.toString(byteArray) + ")");
                }
                for (int b = 0; b < byteArray.length; b++) {
                    if (byteArray[b] != expected[b]) {
                        TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected) + " found " + Arrays.toString(byteArray) + ")");
                    }
                }
            }
            break;
        default:
            throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory());
    }
}
Also used : VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) Text(org.apache.hadoop.io.Text) BytesWritable(org.apache.hadoop.io.BytesWritable) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) Timestamp(java.sql.Timestamp) Date(java.sql.Date) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) FloatWritable(org.apache.hadoop.io.FloatWritable) HiveIntervalYearMonth(org.apache.hadoop.hive.common.type.HiveIntervalYearMonth) BooleanWritable(org.apache.hadoop.io.BooleanWritable) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveIntervalDayTime(org.apache.hadoop.hive.common.type.HiveIntervalDayTime)

Example 7 with DeserializeRead

use of org.apache.hadoop.hive.serde2.fast.DeserializeRead in project hive by apache.

the class VectorDeserializeOrcWriter method writeOneRow.

@Override
public void writeOneRow(Writable row) throws IOException {
    if (sourceBatch.size == VectorizedRowBatch.DEFAULT_SIZE) {
        flushBatch();
    }
    BinaryComparable binComp = (BinaryComparable) row;
    deserializeRead.set(binComp.getBytes(), 0, binComp.getLength());
    // Deserialize and append new row using the current batch size as the index.
    try {
        // Not using ByRef now since it's unsafe for text readers. Might be safe for others.
        vectorDeserializeRow.deserialize(sourceBatch, sourceBatch.size++);
    } catch (Exception e) {
        throw new IOException("DeserializeRead detail: " + vectorDeserializeRow.getDetailedReadPositionString(), e);
    }
}
Also used : BinaryComparable(org.apache.hadoop.io.BinaryComparable) IOException(java.io.IOException) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 8 with DeserializeRead

use of org.apache.hadoop.hive.serde2.fast.DeserializeRead in project hive by apache.

the class TestBinarySortableFast method testBinarySortableFast.

private void testBinarySortableFast(SerdeRandomRowSource source, Object[][] rows, boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker, AbstractSerDe serde, StructObjectInspector rowOI, AbstractSerDe serde_fewer, StructObjectInspector writeRowOI, boolean ascending, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
    int rowCount = rows.length;
    int columnCount = primitiveTypeInfos.length;
    boolean[] columnsToInclude = null;
    if (useIncludeColumns) {
        columnsToInclude = new boolean[columnCount];
        for (int i = 0; i < columnCount; i++) {
            columnsToInclude[i] = r.nextBoolean();
        }
    }
    int writeColumnCount = columnCount;
    if (doWriteFewerColumns) {
        writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
    }
    BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite(columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
    // Try to serialize
    // One Writable per row.
    BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
    int[][] perFieldWriteLengthsArray = new int[rowCount][];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        Output output = new Output();
        binarySortableSerializeWrite.set(output);
        int[] perFieldWriteLengths = new int[columnCount];
        for (int index = 0; index < writeColumnCount; index++) {
            Writable writable = (Writable) row[index];
            VerifyFast.serializeWrite(binarySortableSerializeWrite, primitiveTypeInfos[index], writable);
            perFieldWriteLengths[index] = output.getLength();
        }
        perFieldWriteLengthsArray[i] = perFieldWriteLengths;
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(output.getData(), 0, output.getLength());
        serializeWriteBytes[i] = bytesWritable;
        if (i > 0) {
            int compareResult = serializeWriteBytes[i - 1].compareTo(serializeWriteBytes[i]);
            if ((compareResult < 0 && !ascending) || (compareResult > 0 && ascending)) {
                System.out.println("Test failed in " + (ascending ? "ascending" : "descending") + " order with " + (i - 1) + " and " + i);
                System.out.println("serialized data [" + (i - 1) + "] = " + TestBinarySortableSerDe.hexString(serializeWriteBytes[i - 1]));
                System.out.println("serialized data [" + i + "] = " + TestBinarySortableSerDe.hexString(serializeWriteBytes[i]));
                fail("Sort order of serialized " + (i - 1) + " and " + i + " are reversed!");
            }
        }
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead(primitiveTypeInfos, /* useExternalBuffer */
        false, columnSortOrderIsDesc);
        BytesWritable bytesWritable = serializeWriteBytes[i];
        binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                binarySortableDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(binarySortableDeserializeRead.isEndOfInputReached());
        }
        /*
       * Clip off one byte and expect to get an EOFException on the write field.
       */
        BinarySortableDeserializeRead binarySortableDeserializeRead2 = new BinarySortableDeserializeRead(primitiveTypeInfos, /* useExternalBuffer */
        false, columnSortOrderIsDesc);
        binarySortableDeserializeRead2.set(bytesWritable.getBytes(), 0, // One fewer byte.
        bytesWritable.getLength() - 1);
        for (int index = 0; index < writeColumnCount; index++) {
            Writable writable = (Writable) row[index];
            if (index == writeColumnCount - 1) {
                boolean threw = false;
                try {
                    VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead2, primitiveTypeInfos[index], writable);
                } catch (EOFException e) {
                    //          debugDetailedReadPositionString = binarySortableDeserializeRead2.getDetailedReadPositionString();
                    //          debugStackTrace = e.getStackTrace();
                    threw = true;
                }
                TestCase.assertTrue(threw);
            } else {
                if (useIncludeColumns && !columnsToInclude[index]) {
                    binarySortableDeserializeRead2.skipNextField();
                } else {
                    VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead2, primitiveTypeInfos[index], writable);
                }
            }
        }
    }
    // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        BytesWritable bytesWritable = serializeWriteBytes[i];
        // Note that regular SerDe doesn't tolerate fewer columns.
        List<Object> deserializedRow;
        if (doWriteFewerColumns) {
            deserializedRow = (List<Object>) serde_fewer.deserialize(bytesWritable);
        } else {
            deserializedRow = (List<Object>) serde.deserialize(bytesWritable);
        }
        Object[] row = rows[i];
        for (int index = 0; index < writeColumnCount; index++) {
            Object expected = row[index];
            Object object = deserializedRow.get(index);
            if (expected == null || object == null) {
                if (expected != null || object != null) {
                    fail("SerDe deserialized NULL column mismatch");
                }
            } else {
                if (!object.equals(expected)) {
                    fail("SerDe deserialized value does not match (expected " + expected.getClass().getName() + " " + expected.toString() + ", actual " + object.getClass().getName() + " " + object.toString() + ")");
                }
            }
        }
    }
    // One Writable per row.
    BytesWritable[] serdeBytes = new BytesWritable[rowCount];
    // Serialize using the SerDe, then below deserialize using DeserializeRead.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // Since SerDe reuses memory, we will need to make a copy.
        BytesWritable serialized;
        if (doWriteFewerColumns) {
            serialized = (BytesWritable) serde_fewer.serialize(row, rowOI);
        } else {
            serialized = (BytesWritable) serde.serialize(row, rowOI);
            ;
        }
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(serialized);
        byte[] serDeOutput = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        byte[] serializeWriteExpected = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength());
        if (!Arrays.equals(serDeOutput, serializeWriteExpected)) {
            int mismatchPos = -1;
            if (serDeOutput.length != serializeWriteExpected.length) {
                for (int b = 0; b < Math.min(serDeOutput.length, serializeWriteExpected.length); b++) {
                    if (serDeOutput[b] != serializeWriteExpected[b]) {
                        mismatchPos = b;
                        break;
                    }
                }
                fail("Different byte array lengths: serDeOutput.length " + serDeOutput.length + ", serializeWriteExpected.length " + serializeWriteExpected.length + " mismatchPos " + mismatchPos + " perFieldWriteLengths " + Arrays.toString(perFieldWriteLengthsArray[i]));
            }
            List<Integer> differentPositions = new ArrayList();
            for (int b = 0; b < serDeOutput.length; b++) {
                if (serDeOutput[b] != serializeWriteExpected[b]) {
                    differentPositions.add(b);
                }
            }
            if (differentPositions.size() > 0) {
                List<String> serializeWriteExpectedFields = new ArrayList<String>();
                List<String> serDeFields = new ArrayList<String>();
                int f = 0;
                int lastBegin = 0;
                for (int b = 0; b < serDeOutput.length; b++) {
                    int writeLength = perFieldWriteLengthsArray[i][f];
                    if (b + 1 == writeLength) {
                        serializeWriteExpectedFields.add(displayBytes(serializeWriteExpected, lastBegin, writeLength - lastBegin));
                        serDeFields.add(displayBytes(serDeOutput, lastBegin, writeLength - lastBegin));
                        f++;
                        lastBegin = b + 1;
                    }
                }
                fail("SerializeWrite and SerDe serialization does not match at positions " + differentPositions.toString() + "\n(SerializeWrite: " + serializeWriteExpectedFields.toString() + "\nSerDe: " + serDeFields.toString() + "\nperFieldWriteLengths " + Arrays.toString(perFieldWriteLengthsArray[i]) + "\nprimitiveTypeInfos " + Arrays.toString(primitiveTypeInfos) + "\nrow " + Arrays.toString(row));
            }
        }
        serdeBytes[i] = bytesWritable;
    }
    // Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead(primitiveTypeInfos, /* useExternalBuffer */
        false, columnSortOrderIsDesc);
        BytesWritable bytesWritable = serdeBytes[i];
        binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
                binarySortableDeserializeRead.skipNextField();
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], writable);
            }
        }
        if (writeColumnCount == columnCount) {
            TestCase.assertTrue(binarySortableDeserializeRead.isEndOfInputReached());
        }
    }
}
Also used : BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) ArrayList(java.util.ArrayList) Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) EOFException(java.io.EOFException)

Example 9 with DeserializeRead

use of org.apache.hadoop.hive.serde2.fast.DeserializeRead in project hive by apache.

the class VectorDeserializeRow method convertRowColumn.

/**
   * Convert one row column value that is the current value in deserializeRead.
   *
   * We deserialize into a writable and then pass that writable to an instance of VectorAssignRow
   * to convert the writable to the target data type and assign it into the VectorizedRowBatch.
   *
   * @param batch
   * @param batchIndex
   * @param logicalColumnIndex
   * @throws IOException
   */
private void convertRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex) throws IOException {
    final int projectionColumnNum = projectionColumnNums[logicalColumnIndex];
    Writable convertSourceWritable = convertSourceWritables[logicalColumnIndex];
    switch(sourceCategories[logicalColumnIndex]) {
        case PRIMITIVE:
            {
                switch(sourcePrimitiveCategories[logicalColumnIndex]) {
                    case VOID:
                        convertSourceWritable = null;
                        break;
                    case BOOLEAN:
                        ((BooleanWritable) convertSourceWritable).set(deserializeRead.currentBoolean);
                        break;
                    case BYTE:
                        ((ByteWritable) convertSourceWritable).set(deserializeRead.currentByte);
                        break;
                    case SHORT:
                        ((ShortWritable) convertSourceWritable).set(deserializeRead.currentShort);
                        break;
                    case INT:
                        ((IntWritable) convertSourceWritable).set(deserializeRead.currentInt);
                        break;
                    case LONG:
                        ((LongWritable) convertSourceWritable).set(deserializeRead.currentLong);
                        break;
                    case TIMESTAMP:
                        ((TimestampWritable) convertSourceWritable).set(deserializeRead.currentTimestampWritable);
                        break;
                    case DATE:
                        ((DateWritable) convertSourceWritable).set(deserializeRead.currentDateWritable);
                        break;
                    case FLOAT:
                        ((FloatWritable) convertSourceWritable).set(deserializeRead.currentFloat);
                        break;
                    case DOUBLE:
                        ((DoubleWritable) convertSourceWritable).set(deserializeRead.currentDouble);
                        break;
                    case BINARY:
                        if (deserializeRead.currentBytes == null) {
                            LOG.info("null binary entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum);
                        }
                        ((BytesWritable) convertSourceWritable).set(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesLength);
                        break;
                    case STRING:
                        if (deserializeRead.currentBytes == null) {
                            throw new RuntimeException("null string entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum);
                        }
                        // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String.
                        ((Text) convertSourceWritable).set(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesLength);
                        break;
                    case VARCHAR:
                        {
                            // that does not use Java String objects.
                            if (deserializeRead.currentBytes == null) {
                                throw new RuntimeException("null varchar entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum);
                            }
                            int adjustedLength = StringExpr.truncate(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesLength, maxLengths[logicalColumnIndex]);
                            ((HiveVarcharWritable) convertSourceWritable).set(new String(deserializeRead.currentBytes, deserializeRead.currentBytesStart, adjustedLength, Charsets.UTF_8), -1);
                        }
                        break;
                    case CHAR:
                        {
                            // that does not use Java String objects.
                            if (deserializeRead.currentBytes == null) {
                                throw new RuntimeException("null char entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum);
                            }
                            int adjustedLength = StringExpr.rightTrimAndTruncate(deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesLength, maxLengths[logicalColumnIndex]);
                            ((HiveCharWritable) convertSourceWritable).set(new String(deserializeRead.currentBytes, deserializeRead.currentBytesStart, adjustedLength, Charsets.UTF_8), -1);
                        }
                        break;
                    case DECIMAL:
                        ((HiveDecimalWritable) convertSourceWritable).set(deserializeRead.currentHiveDecimalWritable);
                        break;
                    case INTERVAL_YEAR_MONTH:
                        ((HiveIntervalYearMonthWritable) convertSourceWritable).set(deserializeRead.currentHiveIntervalYearMonthWritable);
                        break;
                    case INTERVAL_DAY_TIME:
                        ((HiveIntervalDayTimeWritable) convertSourceWritable).set(deserializeRead.currentHiveIntervalDayTimeWritable);
                        break;
                    default:
                        throw new RuntimeException("Primitive category " + sourcePrimitiveCategories[logicalColumnIndex] + " not supported");
                }
            }
            break;
        default:
            throw new RuntimeException("Category " + sourceCategories[logicalColumnIndex] + " not supported");
    }
    /*
     * Convert our source object we just read into the target object and store that in the
     * VectorizedRowBatch.
     */
    convertVectorAssignRow.assignConvertRowColumn(batch, batchIndex, logicalColumnIndex, convertSourceWritable);
}
Also used : ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) Writable(org.apache.hadoop.io.Writable) LongWritable(org.apache.hadoop.io.LongWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) HiveIntervalYearMonthWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable) HiveIntervalDayTimeWritable(org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) IntWritable(org.apache.hadoop.io.IntWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) HiveVarcharWritable(org.apache.hadoop.hive.serde2.io.HiveVarcharWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable)

Example 10 with DeserializeRead

use of org.apache.hadoop.hive.serde2.fast.DeserializeRead in project hive by apache.

the class TestVectorSerDeRow method testVectorDeserializeRow.

void testVectorDeserializeRow(Random r, SerializationType serializationType, boolean alternate1, boolean alternate2, boolean useExternalBuffer) throws HiveException, IOException, SerDeException {
    String[] emptyScratchTypeNames = new String[0];
    VectorRandomRowSource source = new VectorRandomRowSource();
    source.init(r);
    VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx();
    batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames);
    VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
    // junk the destination for the 1st pass
    for (ColumnVector cv : batch.cols) {
        Arrays.fill(cv.isNull, true);
    }
    PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos();
    int fieldCount = source.typeNames().size();
    DeserializeRead deserializeRead;
    SerializeWrite serializeWrite;
    switch(serializationType) {
        case BINARY_SORTABLE:
            boolean useColumnSortOrderIsDesc = alternate1;
            if (!useColumnSortOrderIsDesc) {
                deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos(), useExternalBuffer);
                serializeWrite = new BinarySortableSerializeWrite(fieldCount);
            } else {
                boolean[] columnSortOrderIsDesc = new boolean[fieldCount];
                for (int i = 0; i < fieldCount; i++) {
                    columnSortOrderIsDesc[i] = r.nextBoolean();
                }
                deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos(), useExternalBuffer, columnSortOrderIsDesc);
                byte[] columnNullMarker = new byte[fieldCount];
                byte[] columnNotNullMarker = new byte[fieldCount];
                for (int i = 0; i < fieldCount; i++) {
                    if (columnSortOrderIsDesc[i]) {
                        // Descending
                        // Null last (default for descending order)
                        columnNullMarker[i] = BinarySortableSerDe.ZERO;
                        columnNotNullMarker[i] = BinarySortableSerDe.ONE;
                    } else {
                        // Ascending
                        // Null first (default for ascending order)
                        columnNullMarker[i] = BinarySortableSerDe.ZERO;
                        columnNotNullMarker[i] = BinarySortableSerDe.ONE;
                    }
                }
                serializeWrite = new BinarySortableSerializeWrite(columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
            }
            boolean useBinarySortableCharsNeedingEscape = alternate2;
            if (useBinarySortableCharsNeedingEscape) {
                source.addBinarySortableAlphabets();
            }
            break;
        case LAZY_BINARY:
            deserializeRead = new LazyBinaryDeserializeRead(source.primitiveTypeInfos(), useExternalBuffer);
            serializeWrite = new LazyBinarySerializeWrite(fieldCount);
            break;
        case LAZY_SIMPLE:
            {
                StructObjectInspector rowObjectInspector = source.rowStructObjectInspector();
                Configuration conf = new Configuration();
                Properties tbl = new Properties();
                tbl.setProperty(serdeConstants.FIELD_DELIM, "\t");
                tbl.setProperty(serdeConstants.LINE_DELIM, "\n");
                byte separator = (byte) '\t';
                boolean useLazySimpleEscapes = alternate1;
                if (useLazySimpleEscapes) {
                    tbl.setProperty(serdeConstants.QUOTE_CHAR, "'");
                    String escapeString = "\\";
                    tbl.setProperty(serdeConstants.ESCAPE_CHAR, escapeString);
                }
                LazySerDeParameters lazySerDeParams = getSerDeParams(conf, tbl, rowObjectInspector);
                if (useLazySimpleEscapes) {
                    // LazySimple seems to throw away everything but \n and \r.
                    boolean[] needsEscape = lazySerDeParams.getNeedsEscape();
                    StringBuilder sb = new StringBuilder();
                    if (needsEscape['\n']) {
                        sb.append('\n');
                    }
                    if (needsEscape['\r']) {
                        sb.append('\r');
                    }
                    // for (int i = 0; i < needsEscape.length; i++) {
                    //  if (needsEscape[i]) {
                    //    sb.append((char) i);
                    //  }
                    // }
                    String needsEscapeStr = sb.toString();
                    if (needsEscapeStr.length() > 0) {
                        source.addEscapables(needsEscapeStr);
                    }
                }
                deserializeRead = new LazySimpleDeserializeRead(source.primitiveTypeInfos(), useExternalBuffer, separator, lazySerDeParams);
                serializeWrite = new LazySimpleSerializeWrite(fieldCount, separator, lazySerDeParams);
            }
            break;
        default:
            throw new Error("Unknown serialization type " + serializationType);
    }
    VectorDeserializeRow vectorDeserializeRow = new VectorDeserializeRow(deserializeRead);
    vectorDeserializeRow.init();
    // junk the destination for the 1st pass
    for (ColumnVector cv : batch.cols) {
        Arrays.fill(cv.isNull, true);
        cv.noNulls = false;
    }
    VectorExtractRow vectorExtractRow = new VectorExtractRow();
    vectorExtractRow.init(source.typeNames());
    Object[][] randomRows = source.randomRows(100000);
    int firstRandomRowIndex = 0;
    for (int i = 0; i < randomRows.length; i++) {
        Object[] row = randomRows[i];
        Output output = serializeRow(row, source, serializeWrite);
        vectorDeserializeRow.setBytes(output.getData(), 0, output.getLength());
        try {
            vectorDeserializeRow.deserialize(batch, batch.size);
        } catch (Exception e) {
            throw new HiveException("\nDeserializeRead details: " + vectorDeserializeRow.getDetailedReadPositionString(), e);
        }
        batch.size++;
        if (batch.size == batch.DEFAULT_SIZE) {
            examineBatch(batch, vectorExtractRow, primitiveTypeInfos, randomRows, firstRandomRowIndex);
            firstRandomRowIndex = i + 1;
            batch.reset();
        }
    }
    if (batch.size > 0) {
        examineBatch(batch, vectorExtractRow, primitiveTypeInfos, randomRows, firstRandomRowIndex);
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Configuration(org.apache.hadoop.conf.Configuration) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) Properties(java.util.Properties) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead) SerializeWrite(org.apache.hadoop.hive.serde2.fast.SerializeWrite) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) LazySimpleSerializeWrite(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite) DeserializeRead(org.apache.hadoop.hive.serde2.fast.DeserializeRead) BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead) BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) LazySimpleSerializeWrite(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

BytesWritable (org.apache.hadoop.io.BytesWritable)7 Output (org.apache.hadoop.hive.serde2.ByteStream.Output)5 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)4 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)4 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)4 BooleanWritable (org.apache.hadoop.io.BooleanWritable)4 FloatWritable (org.apache.hadoop.io.FloatWritable)4 IntWritable (org.apache.hadoop.io.IntWritable)4 LongWritable (org.apache.hadoop.io.LongWritable)4 Writable (org.apache.hadoop.io.Writable)4 Timestamp (java.sql.Timestamp)3 HiveChar (org.apache.hadoop.hive.common.type.HiveChar)3 HiveDecimal (org.apache.hadoop.hive.common.type.HiveDecimal)3 HiveIntervalDayTime (org.apache.hadoop.hive.common.type.HiveIntervalDayTime)3 HiveIntervalYearMonth (org.apache.hadoop.hive.common.type.HiveIntervalYearMonth)3 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)3 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)3 BinarySortableDeserializeRead (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead)3 BinarySortableSerializeWrite (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite)3 LazySimpleDeserializeRead (org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead)3