Search in sources :

Example 11 with BinarySortableSerializeWrite

use of in project hive by apache.

the class MapJoinTestConfig method loadTableContainerData.

private static void loadTableContainerData(MapJoinTestDescription testDesc, MapJoinTestData testData, MapJoinTableContainer mapJoinTableContainer) throws IOException, SerDeException, HiveException {
    LazyBinarySerializeWrite valueSerializeWrite = null;
    Output valueOutput = null;
    if (testData.smallTableValues != null) {
        valueSerializeWrite = new LazyBinarySerializeWrite(testDesc.smallTableValueTypeInfos.length);
        valueOutput = new Output();
    BytesWritable valueBytesWritable = new BytesWritable();
    BytesWritable keyBytesWritable = new BytesWritable();
    BinarySortableSerializeWrite keySerializeWrite = new BinarySortableSerializeWrite(testDesc.bigTableKeyTypeInfos.length);
    Output keyOutput = new Output();
    int round = 0;
    boolean atLeastOneValueAdded = false;
    while (true) {
        for (Entry<RowTestObjects, Integer> testRowEntry : testData.smallTableKeyHashMap.entrySet()) {
            final int smallTableKeyIndex = testRowEntry.getValue();
            final int valueCount = testData.smallTableValueCounts.get(smallTableKeyIndex);
            boolean addEntry = round + 1 <= valueCount;
            if (addEntry) {
                atLeastOneValueAdded = true;
                RowTestObjects valueRow = null;
                if (testData.smallTableValues != null) {
                    ArrayList<RowTestObjects> valueList = testData.smallTableValues.get(smallTableKeyIndex);
                    valueRow = valueList.get(round);
                Object[] smallTableKey = testRowEntry.getKey().getRow();
                for (int index = 0; index < testDesc.bigTableKeyTypeInfos.length; index++) {
                    Writable keyWritable = (Writable) smallTableKey[index];
                    VerifyFastRow.serializeWrite(keySerializeWrite, (PrimitiveTypeInfo) testDesc.bigTableKeyTypeInfos[index], keyWritable);
                keyBytesWritable.set(keyOutput.getData(), 0, keyOutput.getLength());
                if (valueRow == null) {
                    // Empty value.
                    mapJoinTableContainer.putRow(keyBytesWritable, valueBytesWritable);
                } else {
                    Object[] smallTableValue = valueRow.getRow();
                    for (int index = 0; index < testDesc.smallTableValueTypeInfos.length; index++) {
                        Writable valueWritable = (Writable) smallTableValue[index];
                        VerifyFastRow.serializeWrite(valueSerializeWrite, (PrimitiveTypeInfo) testDesc.smallTableValueTypeInfos[index], valueWritable);
                    valueBytesWritable.set(valueOutput.getData(), 0, valueOutput.getLength());
                    mapJoinTableContainer.putRow(keyBytesWritable, valueBytesWritable);
        if (testData.smallTableValues == null || !atLeastOneValueAdded) {
        atLeastOneValueAdded = false;
Also used : LazyBinarySerializeWrite( Writable( BytesWritable( BytesWritable( BinarySortableSerializeWrite( RowTestObjects(org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects) Output(org.apache.hadoop.hive.serde2.ByteStream.Output)

Example 12 with BinarySortableSerializeWrite

use of in project hive by apache.

the class TestVectorMapJoinFastRowHashMap method addAndVerifyRows.

private void addAndVerifyRows(VectorRandomRowSource valueSource, Object[][] rows, VectorMapJoinFastHashTable map, HashTableKeyType hashTableKeyType, VerifyFastRowHashMap verifyTable, String[] keyTypeNames, boolean doClipping, boolean useExactBytes) throws HiveException, IOException, SerDeException {
    final int keyCount = keyTypeNames.length;
    PrimitiveTypeInfo[] keyPrimitiveTypeInfos = new PrimitiveTypeInfo[keyCount];
    PrimitiveCategory[] keyPrimitiveCategories = new PrimitiveCategory[keyCount];
    ArrayList<ObjectInspector> keyPrimitiveObjectInspectorList = new ArrayList<ObjectInspector>(keyCount);
    for (int i = 0; i < keyCount; i++) {
        PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(keyTypeNames[i]);
        keyPrimitiveTypeInfos[i] = primitiveTypeInfo;
        PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
        keyPrimitiveCategories[i] = primitiveCategory;
    boolean[] keyColumnSortOrderIsDesc = new boolean[keyCount];
    Arrays.fill(keyColumnSortOrderIsDesc, false);
    byte[] keyColumnNullMarker = new byte[keyCount];
    Arrays.fill(keyColumnNullMarker, BinarySortableSerDe.ZERO);
    byte[] keyColumnNotNullMarker = new byte[keyCount];
    Arrays.fill(keyColumnNotNullMarker, BinarySortableSerDe.ONE);
    BinarySortableSerializeWrite keySerializeWrite = new BinarySortableSerializeWrite(keyColumnSortOrderIsDesc, keyColumnNullMarker, keyColumnNotNullMarker);
    TypeInfo[] valueTypeInfos = valueSource.typeInfos();
    final int columnCount = valueTypeInfos.length;
    SerializeWrite valueSerializeWrite = new LazyBinarySerializeWrite(columnCount);
    final int count = rows.length;
    for (int i = 0; i < count; i++) {
        Object[] valueRow = rows[i];
        Output valueOutput = new Output();
        ((LazyBinarySerializeWrite) valueSerializeWrite).set(valueOutput);
        for (int index = 0; index < columnCount; index++) {
            VerifyFastRow.serializeWrite(valueSerializeWrite, valueTypeInfos[index], valueRow[index]);
        byte[] value = Arrays.copyOf(valueOutput.getData(), valueOutput.getLength());
        // Add a new key or add a value to an existing key?
        byte[] key;
        if (random.nextBoolean() || verifyTable.getCount() == 0) {
            Object[] keyRow = VectorRandomRowSource.randomWritablePrimitiveRow(keyCount, random, keyPrimitiveTypeInfos);
            Output keyOutput = new Output();
            for (int index = 0; index < keyCount; index++) {
                VerifyFastRow.serializeWrite(keySerializeWrite, keyPrimitiveTypeInfos[index], keyRow[index]);
            key = Arrays.copyOf(keyOutput.getData(), keyOutput.getLength());
            verifyTable.add(key, keyRow, value, valueRow);
        } else {
            key = verifyTable.addRandomExisting(value, valueRow, random);
        // Serialize keyRow into key bytes.
        BytesWritable keyWritable = new BytesWritable(key);
        BytesWritable valueWritable = new BytesWritable(value);
        map.putRow(keyWritable, valueWritable);
    // verifyTable.verify(map);
    verifyTable.verify(map, hashTableKeyType, valueTypeInfos, doClipping, useExactBytes, random);
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) LazyBinarySerializeWrite( BytesWritable( BinarySortableSerializeWrite( TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) SerializeWrite( BinarySortableSerializeWrite( LazyBinarySerializeWrite(

Example 13 with BinarySortableSerializeWrite

use of in project hive by apache.

the class TestVectorSerDeRow method testVectorDeserializeRow.

void testVectorDeserializeRow(Random r, SerializationType serializationType, boolean alternate1, boolean alternate2, boolean useExternalBuffer) throws HiveException, IOException, SerDeException {
    String[] emptyScratchTypeNames = new String[0];
    VectorRandomRowSource source = new VectorRandomRowSource();
    VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx();
    batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames);
    VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
    // junk the destination for the 1st pass
    for (ColumnVector cv : batch.cols) {
        Arrays.fill(cv.isNull, true);
    PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos();
    int fieldCount = source.typeNames().size();
    DeserializeRead deserializeRead;
    SerializeWrite serializeWrite;
    switch(serializationType) {
        case BINARY_SORTABLE:
            boolean useColumnSortOrderIsDesc = alternate1;
            if (!useColumnSortOrderIsDesc) {
                deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos(), useExternalBuffer);
                serializeWrite = new BinarySortableSerializeWrite(fieldCount);
            } else {
                boolean[] columnSortOrderIsDesc = new boolean[fieldCount];
                for (int i = 0; i < fieldCount; i++) {
                    columnSortOrderIsDesc[i] = r.nextBoolean();
                deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos(), useExternalBuffer, columnSortOrderIsDesc);
                byte[] columnNullMarker = new byte[fieldCount];
                byte[] columnNotNullMarker = new byte[fieldCount];
                for (int i = 0; i < fieldCount; i++) {
                    if (columnSortOrderIsDesc[i]) {
                        // Descending
                        // Null last (default for descending order)
                        columnNullMarker[i] = BinarySortableSerDe.ZERO;
                        columnNotNullMarker[i] = BinarySortableSerDe.ONE;
                    } else {
                        // Ascending
                        // Null first (default for ascending order)
                        columnNullMarker[i] = BinarySortableSerDe.ZERO;
                        columnNotNullMarker[i] = BinarySortableSerDe.ONE;
                serializeWrite = new BinarySortableSerializeWrite(columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
            boolean useBinarySortableCharsNeedingEscape = alternate2;
            if (useBinarySortableCharsNeedingEscape) {
        case LAZY_BINARY:
            deserializeRead = new LazyBinaryDeserializeRead(source.primitiveTypeInfos(), useExternalBuffer);
            serializeWrite = new LazyBinarySerializeWrite(fieldCount);
        case LAZY_SIMPLE:
                StructObjectInspector rowObjectInspector = source.rowStructObjectInspector();
                Configuration conf = new Configuration();
                Properties tbl = new Properties();
                tbl.setProperty(serdeConstants.FIELD_DELIM, "\t");
                tbl.setProperty(serdeConstants.LINE_DELIM, "\n");
                byte separator = (byte) '\t';
                boolean useLazySimpleEscapes = alternate1;
                if (useLazySimpleEscapes) {
                    tbl.setProperty(serdeConstants.QUOTE_CHAR, "'");
                    String escapeString = "\\";
                    tbl.setProperty(serdeConstants.ESCAPE_CHAR, escapeString);
                LazySerDeParameters lazySerDeParams = getSerDeParams(conf, tbl, rowObjectInspector);
                if (useLazySimpleEscapes) {
                    // LazySimple seems to throw away everything but \n and \r.
                    boolean[] needsEscape = lazySerDeParams.getNeedsEscape();
                    StringBuilder sb = new StringBuilder();
                    if (needsEscape['\n']) {
                    if (needsEscape['\r']) {
                    // for (int i = 0; i < needsEscape.length; i++) {
                    //  if (needsEscape[i]) {
                    //    sb.append((char) i);
                    //  }
                    // }
                    String needsEscapeStr = sb.toString();
                    if (needsEscapeStr.length() > 0) {
                deserializeRead = new LazySimpleDeserializeRead(source.primitiveTypeInfos(), useExternalBuffer, separator, lazySerDeParams);
                serializeWrite = new LazySimpleSerializeWrite(fieldCount, separator, lazySerDeParams);
            throw new Error("Unknown serialization type " + serializationType);
    VectorDeserializeRow vectorDeserializeRow = new VectorDeserializeRow(deserializeRead);
    // junk the destination for the 1st pass
    for (ColumnVector cv : batch.cols) {
        Arrays.fill(cv.isNull, true);
        cv.noNulls = false;
    VectorExtractRow vectorExtractRow = new VectorExtractRow();
    Object[][] randomRows = source.randomRows(100000);
    int firstRandomRowIndex = 0;
    for (int i = 0; i < randomRows.length; i++) {
        Object[] row = randomRows[i];
        Output output = serializeRow(row, source, serializeWrite);
        vectorDeserializeRow.setBytes(output.getData(), 0, output.getLength());
        try {
            vectorDeserializeRow.deserialize(batch, batch.size);
        } catch (Exception e) {
            throw new HiveException("\nDeserializeRead details: " + vectorDeserializeRow.getDetailedReadPositionString(), e);
        if (batch.size == batch.DEFAULT_SIZE) {
            examineBatch(batch, vectorExtractRow, primitiveTypeInfos, randomRows, firstRandomRowIndex);
            firstRandomRowIndex = i + 1;
    if (batch.size > 0) {
        examineBatch(batch, vectorExtractRow, primitiveTypeInfos, randomRows, firstRandomRowIndex);
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Configuration(org.apache.hadoop.conf.Configuration) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) LazyBinarySerializeWrite( BinarySortableSerializeWrite( Properties(java.util.Properties) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) LazyBinaryDeserializeRead( SerializeWrite( LazyBinarySerializeWrite( BinarySortableSerializeWrite( LazySimpleSerializeWrite( DeserializeRead( BinarySortableDeserializeRead( LazySimpleDeserializeRead( LazyBinaryDeserializeRead( BinarySortableDeserializeRead( LazySimpleDeserializeRead( LazySimpleSerializeWrite( HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) IOException( SerDeException(org.apache.hadoop.hive.serde2.SerDeException) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 14 with BinarySortableSerializeWrite

use of in project hive by apache.

the class TestBinarySortableFast method testBinarySortableFast.

private void testBinarySortableFast(SerdeRandomRowSource source, Object[][] rows, boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker, AbstractSerDe serde, StructObjectInspector rowOI, AbstractSerDe serde_fewer, StructObjectInspector writeRowOI, boolean ascending, PrimitiveTypeInfo[] primitiveTypeInfos, boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable {
    int rowCount = rows.length;
    int columnCount = primitiveTypeInfos.length;
    boolean[] columnsToInclude = null;
    if (useIncludeColumns) {
        columnsToInclude = new boolean[columnCount];
        for (int i = 0; i < columnCount; i++) {
            columnsToInclude[i] = r.nextBoolean();
    int writeColumnCount = columnCount;
    if (doWriteFewerColumns) {
        writeColumnCount = writeRowOI.getAllStructFieldRefs().size();
    BinarySortableSerializeWrite binarySortableSerializeWrite = new BinarySortableSerializeWrite(columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
    // Try to serialize
    // One Writable per row.
    BytesWritable[] serializeWriteBytes = new BytesWritable[rowCount];
    int[][] perFieldWriteLengthsArray = new int[rowCount][];
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        Output output = new Output();
        int[] perFieldWriteLengths = new int[columnCount];
        for (int index = 0; index < writeColumnCount; index++) {
            Writable writable = (Writable) row[index];
            VerifyFast.serializeWrite(binarySortableSerializeWrite, primitiveTypeInfos[index], writable);
            perFieldWriteLengths[index] = output.getLength();
        perFieldWriteLengthsArray[i] = perFieldWriteLengths;
        BytesWritable bytesWritable = new BytesWritable();
        bytesWritable.set(output.getData(), 0, output.getLength());
        serializeWriteBytes[i] = bytesWritable;
        if (i > 0) {
            int compareResult = serializeWriteBytes[i - 1].compareTo(serializeWriteBytes[i]);
            if ((compareResult < 0 && !ascending) || (compareResult > 0 && ascending)) {
                System.out.println("Test failed in " + (ascending ? "ascending" : "descending") + " order with " + (i - 1) + " and " + i);
                System.out.println("serialized data [" + (i - 1) + "] = " + TestBinarySortableSerDe.hexString(serializeWriteBytes[i - 1]));
                System.out.println("serialized data [" + i + "] = " + TestBinarySortableSerDe.hexString(serializeWriteBytes[i]));
                fail("Sort order of serialized " + (i - 1) + " and " + i + " are reversed!");
    // Try to deserialize using DeserializeRead our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead(primitiveTypeInfos, /* useExternalBuffer */
        false, columnSortOrderIsDesc);
        BytesWritable bytesWritable = serializeWriteBytes[i];
        binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], writable);
        if (writeColumnCount == columnCount) {
       * Clip off one byte and expect to get an EOFException on the write field.
        BinarySortableDeserializeRead binarySortableDeserializeRead2 = new BinarySortableDeserializeRead(primitiveTypeInfos, /* useExternalBuffer */
        false, columnSortOrderIsDesc);
        binarySortableDeserializeRead2.set(bytesWritable.getBytes(), 0, // One fewer byte.
        bytesWritable.getLength() - 1);
        for (int index = 0; index < writeColumnCount; index++) {
            Writable writable = (Writable) row[index];
            if (index == writeColumnCount - 1) {
                boolean threw = false;
                try {
                    VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead2, primitiveTypeInfos[index], writable);
                } catch (EOFException e) {
                    //          debugDetailedReadPositionString = binarySortableDeserializeRead2.getDetailedReadPositionString();
                    //          debugStackTrace = e.getStackTrace();
                    threw = true;
            } else {
                if (useIncludeColumns && !columnsToInclude[index]) {
                } else {
                    VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead2, primitiveTypeInfos[index], writable);
    // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite.
    for (int i = 0; i < rowCount; i++) {
        BytesWritable bytesWritable = serializeWriteBytes[i];
        // Note that regular SerDe doesn't tolerate fewer columns.
        List<Object> deserializedRow;
        if (doWriteFewerColumns) {
            deserializedRow = (List<Object>) serde_fewer.deserialize(bytesWritable);
        } else {
            deserializedRow = (List<Object>) serde.deserialize(bytesWritable);
        Object[] row = rows[i];
        for (int index = 0; index < writeColumnCount; index++) {
            Object expected = row[index];
            Object object = deserializedRow.get(index);
            if (expected == null || object == null) {
                if (expected != null || object != null) {
                    fail("SerDe deserialized NULL column mismatch");
            } else {
                if (!object.equals(expected)) {
                    fail("SerDe deserialized value does not match (expected " + expected.getClass().getName() + " " + expected.toString() + ", actual " + object.getClass().getName() + " " + object.toString() + ")");
    // One Writable per row.
    BytesWritable[] serdeBytes = new BytesWritable[rowCount];
    // Serialize using the SerDe, then below deserialize using DeserializeRead.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        // Since SerDe reuses memory, we will need to make a copy.
        BytesWritable serialized;
        if (doWriteFewerColumns) {
            serialized = (BytesWritable) serde_fewer.serialize(row, rowOI);
        } else {
            serialized = (BytesWritable) serde.serialize(row, rowOI);
        BytesWritable bytesWritable = new BytesWritable();
        byte[] serDeOutput = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        byte[] serializeWriteExpected = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength());
        if (!Arrays.equals(serDeOutput, serializeWriteExpected)) {
            int mismatchPos = -1;
            if (serDeOutput.length != serializeWriteExpected.length) {
                for (int b = 0; b < Math.min(serDeOutput.length, serializeWriteExpected.length); b++) {
                    if (serDeOutput[b] != serializeWriteExpected[b]) {
                        mismatchPos = b;
                fail("Different byte array lengths: serDeOutput.length " + serDeOutput.length + ", serializeWriteExpected.length " + serializeWriteExpected.length + " mismatchPos " + mismatchPos + " perFieldWriteLengths " + Arrays.toString(perFieldWriteLengthsArray[i]));
            List<Integer> differentPositions = new ArrayList();
            for (int b = 0; b < serDeOutput.length; b++) {
                if (serDeOutput[b] != serializeWriteExpected[b]) {
            if (differentPositions.size() > 0) {
                List<String> serializeWriteExpectedFields = new ArrayList<String>();
                List<String> serDeFields = new ArrayList<String>();
                int f = 0;
                int lastBegin = 0;
                for (int b = 0; b < serDeOutput.length; b++) {
                    int writeLength = perFieldWriteLengthsArray[i][f];
                    if (b + 1 == writeLength) {
                        serializeWriteExpectedFields.add(displayBytes(serializeWriteExpected, lastBegin, writeLength - lastBegin));
                        serDeFields.add(displayBytes(serDeOutput, lastBegin, writeLength - lastBegin));
                        lastBegin = b + 1;
                fail("SerializeWrite and SerDe serialization does not match at positions " + differentPositions.toString() + "\n(SerializeWrite: " + serializeWriteExpectedFields.toString() + "\nSerDe: " + serDeFields.toString() + "\nperFieldWriteLengths " + Arrays.toString(perFieldWriteLengthsArray[i]) + "\nprimitiveTypeInfos " + Arrays.toString(primitiveTypeInfos) + "\nrow " + Arrays.toString(row));
        serdeBytes[i] = bytesWritable;
    // Try to deserialize using DeserializeRead our Writable row objects created by SerDe.
    for (int i = 0; i < rowCount; i++) {
        Object[] row = rows[i];
        BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead(primitiveTypeInfos, /* useExternalBuffer */
        false, columnSortOrderIsDesc);
        BytesWritable bytesWritable = serdeBytes[i];
        binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength());
        for (int index = 0; index < columnCount; index++) {
            if (useIncludeColumns && !columnsToInclude[index]) {
            } else if (index >= writeColumnCount) {
                // Should come back a null.
                VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], null);
            } else {
                Writable writable = (Writable) row[index];
                VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], writable);
        if (writeColumnCount == columnCount) {
Also used : BinarySortableDeserializeRead( ArrayList(java.util.ArrayList) Writable( BytesWritable( BytesWritable( BinarySortableSerializeWrite( Output(org.apache.hadoop.hive.serde2.ByteStream.Output) EOFException(

Example 15 with BinarySortableSerializeWrite

use of in project hive by apache.

the class VectorMapJoinOuterMultiKeyOperator method process.

// ---------------------------------------------------------------------------
// Process Multi-Key Outer Join on a vectorized row batch.
public void process(Object row, int tag) throws HiveException {
    try {
        VectorizedRowBatch batch = (VectorizedRowBatch) row;
        alias = (byte) tag;
        if (needCommonSetup) {
            // Our one time process method initialization.
         * Initialize Multi-Key members for this specialized class.
            keyVectorSerializeWrite = new VectorSerializeRow(new BinarySortableSerializeWrite(bigTableKeyColumnMap.length));
            keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap);
            currentKeyOutput = new Output();
            saveKeyOutput = new Output();
            needCommonSetup = false;
        if (needHashTableSetup) {
            // Setup our hash table specialization.  It will be the first time the process
            // method is called, or after a Hybrid Grace reload.
         * Get our Multi-Key hash map information for this specialized class.
            hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable;
            needHashTableSetup = false;
        final int inputLogicalSize = batch.size;
        if (inputLogicalSize == 0) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
        // Do the per-batch setup for an outer join.
        // For outer join, remember our input rows before ON expression filtering or before
        // hash table matching so we can generate results for all rows (matching and non matching)
        // later.
        boolean inputSelectedInUse = batch.selectedInUse;
        if (inputSelectedInUse) {
            // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) {
            // throw new HiveException("batch.selected is not in sort order and unique");
            // }
            System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize);
        // Filtering for outer join just removes rows available for hash table matching.
        boolean someRowsFilteredOut = false;
        if (bigTableFilterExpressions.length > 0) {
            // Since the input
            for (VectorExpression ve : bigTableFilterExpressions) {
            someRowsFilteredOut = (batch.size != inputLogicalSize);
            if (LOG.isDebugEnabled()) {
                if (batch.selectedInUse) {
                    if (inputSelectedInUse) {
                        LOG.debug(CLASS_NAME + " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) + " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
                    } else {
                        LOG.debug(CLASS_NAME + " inputLogicalSize " + inputLogicalSize + " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size));
        // Perform any key expressions.  Results will go into scratch columns.
        if (bigTableKeyExpressions != null) {
            for (VectorExpression ve : bigTableKeyExpressions) {
       * Multi-Key specific declarations.
        // None.
       * Multi-Key Long check for repeating.
        // If all BigTable input columns to key expressions are isRepeating, then
        // calculate key once; lookup once.
        // Also determine if any nulls are present since for a join that means no match.
        boolean allKeyInputColumnsRepeating;
        // Only valid if allKeyInputColumnsRepeating is true.
        boolean someKeyInputColumnIsNull = false;
        if (bigTableKeyColumnMap.length == 0) {
            allKeyInputColumnsRepeating = false;
        } else {
            allKeyInputColumnsRepeating = true;
            for (int i = 0; i < bigTableKeyColumnMap.length; i++) {
                ColumnVector colVector = batch.cols[bigTableKeyColumnMap[i]];
                if (!colVector.isRepeating) {
                    allKeyInputColumnsRepeating = false;
                if (!colVector.noNulls && colVector.isNull[0]) {
                    someKeyInputColumnIsNull = true;
        if (allKeyInputColumnsRepeating) {
         * Repeating.
            // All key input columns are repeating.  Generate key once.  Lookup once.
            // Since the key is repeated, we must use entry 0 regardless of selectedInUse.
         * Multi-Key specific repeated lookup.
            JoinUtil.JoinResult joinResult;
            if (batch.size == 0) {
                // Whole repeated key batch was filtered out.
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else if (someKeyInputColumnIsNull) {
                // Any (repeated) null key column is no match for whole batch.
                joinResult = JoinUtil.JoinResult.NOMATCH;
            } else {
                // All key input columns are repeating.  Generate key once.  Lookup once.
                keyVectorSerializeWrite.serializeWrite(batch, 0);
                byte[] keyBytes = currentKeyOutput.getData();
                int keyLength = currentKeyOutput.getLength();
                joinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[0]);
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " +;
            finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut, inputSelectedInUse, inputLogicalSize);
        } else {
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated");
            int[] selected = batch.selected;
            boolean selectedInUse = batch.selectedInUse;
            int hashMapResultCount = 0;
            int allMatchCount = 0;
            int equalKeySeriesCount = 0;
            int spillCount = 0;
            boolean atLeastOneNonMatch = someRowsFilteredOut;
         * Multi-Key specific variables.
            Output temp;
            // We optimize performance by only looking up the first key in a series of equal keys.
            boolean haveSaveKey = false;
            JoinUtil.JoinResult saveJoinResult = JoinUtil.JoinResult.NOMATCH;
            // Logical loop over the rows in the batch since the batch may have selected in use.
            for (int logical = 0; logical < batch.size; logical++) {
                int batchIndex = (selectedInUse ? selected[logical] : logical);
                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch");
           * Multi-Key outer null detection.
                // Generate binary sortable key for current row in vectorized row batch.
                keyVectorSerializeWrite.serializeWrite(batch, batchIndex);
                if (keyVectorSerializeWrite.getHasAnyNulls()) {
                    // Have that the NULL does not interfere with the current equal key series, if there
                    // is one. We do not set saveJoinResult.
                    // Let a current MATCH equal key series keep going, or
                    // Let a current SPILL equal key series keep going, or
                    // Let a current NOMATCH keep not matching.
                    atLeastOneNonMatch = true;
                // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL");
                } else {
                    if (!haveSaveKey || !saveKeyOutput.arraysEquals(currentKeyOutput)) {
                        if (haveSaveKey) {
                            // Move on with our counts.
                            switch(saveJoinResult) {
                                case MATCH:
                                case SPILL:
                                case NOMATCH:
                        // Regardless of our matching result, we keep that information to make multiple use
                        // of it for a possible series of equal keys.
                        haveSaveKey = true;
               * Multi-Key specific save key.
                        temp = saveKeyOutput;
                        saveKeyOutput = currentKeyOutput;
                        currentKeyOutput = temp;
               * Multi-Key specific lookup key.
                        byte[] keyBytes = saveKeyOutput.getData();
                        int keyLength = saveKeyOutput.getLength();
                        saveJoinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[hashMapResultCount]);
                        switch(saveJoinResult) {
                            case MATCH:
                                equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount;
                                equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount;
                                equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow();
                                equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1;
                                allMatchs[allMatchCount++] = batchIndex;
                                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey);
                            case SPILL:
                                spills[spillCount] = batchIndex;
                                spillHashMapResultIndices[spillCount] = hashMapResultCount;
                            case NOMATCH:
                                atLeastOneNonMatch = true;
                                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey);
                    } else {
                        switch(saveJoinResult) {
                            case MATCH:
                                allMatchs[allMatchCount++] = batchIndex;
                                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate");
                            case SPILL:
                                spills[spillCount] = batchIndex;
                                spillHashMapResultIndices[spillCount] = hashMapResultCount;
                            case NOMATCH:
                                // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate");
                // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) {
                // throw new HiveException("allMatchs is not in sort order and unique");
                // }
            if (haveSaveKey) {
                // Update our counts for the last key.
                switch(saveJoinResult) {
                    case MATCH:
                    case SPILL:
                    case NOMATCH:
            if (LOG.isDebugEnabled()) {
                LOG.debug(CLASS_NAME + " batch #" + batchCounter + " allMatchs " + intArrayToRangesString(allMatchs, allMatchCount) + " equalKeySeriesHashMapResultIndices " + intArrayToRangesString(equalKeySeriesHashMapResultIndices, equalKeySeriesCount) + " equalKeySeriesAllMatchIndices " + intArrayToRangesString(equalKeySeriesAllMatchIndices, equalKeySeriesCount) + " equalKeySeriesIsSingleValue " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesIsSingleValue, 0, equalKeySeriesCount)) + " equalKeySeriesDuplicateCounts " + Arrays.toString(Arrays.copyOfRange(equalKeySeriesDuplicateCounts, 0, equalKeySeriesCount)) + " atLeastOneNonMatch " + atLeastOneNonMatch + " inputSelectedInUse " + inputSelectedInUse + " inputLogicalSize " + inputLogicalSize + " spills " + intArrayToRangesString(spills, spillCount) + " spillHashMapResultIndices " + intArrayToRangesString(spillHashMapResultIndices, spillCount) + " hashMapResults " + Arrays.toString(Arrays.copyOfRange(hashMapResults, 0, hashMapResultCount)));
            // We will generate results for all matching and non-matching rows.
            finishOuter(batch, allMatchCount, equalKeySeriesCount, atLeastOneNonMatch, inputSelectedInUse, inputLogicalSize, spillCount, hashMapResultCount);
        if (batch.size > 0) {
            // Forward any remaining selected rows.
    } catch (IOException e) {
        throw new HiveException(e);
    } catch (Exception e) {
        throw new HiveException(e);
Also used : JoinUtil(org.apache.hadoop.hive.ql.exec.JoinUtil) VectorSerializeRow(org.apache.hadoop.hive.ql.exec.vector.VectorSerializeRow) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) BinarySortableSerializeWrite( IOException( IOException( HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) Output(org.apache.hadoop.hive.serde2.ByteStream.Output) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)


BinarySortableSerializeWrite ( Output (org.apache.hadoop.hive.serde2.ByteStream.Output)15 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)9 LazyBinarySerializeWrite ( IOException ( BinarySortableDeserializeRead ( SerializeWrite ( BytesWritable ( JoinUtil (org.apache.hadoop.hive.ql.exec.JoinUtil)4 VectorSerializeRow (org.apache.hadoop.hive.ql.exec.vector.VectorSerializeRow)4 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)4 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)4 DeserializeRead ( ArrayList (java.util.ArrayList)3 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)3 LazySerDeParameters (org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)3 LazySimpleDeserializeRead ( LazySimpleSerializeWrite ( LazyBinaryDeserializeRead ( StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)3