Search in sources :

Example 1 with RowTestObjects

use of org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects in project hive by apache.

the class RowCollectorTestOperator method process.

@Override
public void process(Object row, int tag) throws HiveException {
    rowCount++;
    Object[] rowObjectArray = (Object[]) row;
    Object[] resultObjectArray = new Object[rowObjectArray.length];
    for (int c = 0; c < rowObjectArray.length; c++) {
        resultObjectArray[c] = ((PrimitiveObjectInspector) outputObjectInspectors[c]).copyObject(rowObjectArray[c]);
    }
    nextTestRow(new RowTestObjects(resultObjectArray));
}
Also used : RowTestObjects(org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects)

Example 2 with RowTestObjects

use of org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects in project hive by apache.

the class MapJoinTestConfig method loadTableContainerData.

private static void loadTableContainerData(MapJoinTestDescription testDesc, MapJoinTestData testData, MapJoinTableContainer mapJoinTableContainer) throws IOException, SerDeException, HiveException {
    LazyBinarySerializeWrite valueSerializeWrite = null;
    Output valueOutput = null;
    if (testData.smallTableValues != null) {
        valueSerializeWrite = new LazyBinarySerializeWrite(testDesc.smallTableValueTypeInfos.length);
        valueOutput = new Output();
    }
    BytesWritable valueBytesWritable = new BytesWritable();
    BytesWritable keyBytesWritable = new BytesWritable();
    BinarySortableSerializeWrite keySerializeWrite = new BinarySortableSerializeWrite(testDesc.bigTableKeyTypeInfos.length);
    Output keyOutput = new Output();
    int round = 0;
    boolean atLeastOneValueAdded = false;
    while (true) {
        for (Entry<RowTestObjects, Integer> testRowEntry : testData.smallTableKeyHashMap.entrySet()) {
            final int smallTableKeyIndex = testRowEntry.getValue();
            final int valueCount = testData.smallTableValueCounts.get(smallTableKeyIndex);
            boolean addEntry = round + 1 <= valueCount;
            if (addEntry) {
                atLeastOneValueAdded = true;
                RowTestObjects valueRow = null;
                if (testData.smallTableValues != null) {
                    ArrayList<RowTestObjects> valueList = testData.smallTableValues.get(smallTableKeyIndex);
                    valueRow = valueList.get(round);
                }
                Object[] smallTableKey = testRowEntry.getKey().getRow();
                keyOutput.reset();
                keySerializeWrite.set(keyOutput);
                for (int index = 0; index < testDesc.bigTableKeyTypeInfos.length; index++) {
                    Writable keyWritable = (Writable) smallTableKey[index];
                    VerifyFastRow.serializeWrite(keySerializeWrite, (PrimitiveTypeInfo) testDesc.bigTableKeyTypeInfos[index], keyWritable);
                }
                keyBytesWritable.set(keyOutput.getData(), 0, keyOutput.getLength());
                if (valueRow == null) {
                    // Empty value.
                    mapJoinTableContainer.putRow(keyBytesWritable, valueBytesWritable);
                } else {
                    Object[] smallTableValue = valueRow.getRow();
                    valueOutput.reset();
                    valueSerializeWrite.set(valueOutput);
                    for (int index = 0; index < testDesc.smallTableValueTypeInfos.length; index++) {
                        Writable valueWritable = (Writable) smallTableValue[index];
                        VerifyFastRow.serializeWrite(valueSerializeWrite, (PrimitiveTypeInfo) testDesc.smallTableValueTypeInfos[index], valueWritable);
                    }
                    valueBytesWritable.set(valueOutput.getData(), 0, valueOutput.getLength());
                    mapJoinTableContainer.putRow(keyBytesWritable, valueBytesWritable);
                }
            }
        }
        if (testData.smallTableValues == null || !atLeastOneValueAdded) {
            break;
        }
        round++;
        atLeastOneValueAdded = false;
    }
    mapJoinTableContainer.seal();
}
Also used : LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) Writable(org.apache.hadoop.io.Writable) BytesWritable(org.apache.hadoop.io.BytesWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) RowTestObjects(org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects) Output(org.apache.hadoop.hive.serde2.ByteStream.Output)

Example 3 with RowTestObjects

use of org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects in project hive by apache.

the class TestMapJoinOperator method addToOutput.

private void addToOutput(MapJoinTestDescription testDesc, RowTestObjectsMultiSet expectedTestRowMultiSet, Object[] outputObjects) {
    for (int c = 0; c < outputObjects.length; c++) {
        PrimitiveObjectInspector primitiveObjInsp = ((PrimitiveObjectInspector) testDesc.outputObjectInspectors[c]);
        Object outputObject = outputObjects[c];
        outputObjects[c] = primitiveObjInsp.copyObject(outputObject);
    }
    expectedTestRowMultiSet.add(new RowTestObjects(outputObjects));
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) RowTestObjects(org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects)

Example 4 with RowTestObjects

use of org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects in project hive by apache.

the class TestMapJoinOperator method createExpectedTestRowMultiSet.

/*
   * Simulate the join by driving the test big table data by our test small table HashMap and
   * create the expected output as a multi-set of TestRow (i.e. TestRow and occurrence count).
   */
private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescription testDesc, MapJoinTestData testData) throws HiveException {
    RowTestObjectsMultiSet expectedTestRowMultiSet = new RowTestObjectsMultiSet();
    VectorExtractRow vectorExtractRow = new VectorExtractRow();
    vectorExtractRow.init(testDesc.bigTableKeyTypeInfos);
    final int bigTableColumnCount = testDesc.bigTableTypeInfos.length;
    Object[] bigTableRowObjects = new Object[bigTableColumnCount];
    final int bigTableKeyColumnCount = testDesc.bigTableKeyTypeInfos.length;
    Object[] bigTableKeyObjects = new Object[bigTableKeyColumnCount];
    VectorBatchGenerateStream bigTableBatchStream = testData.getBigTableBatchStream();
    VectorizedRowBatch batch = testData.getBigTableBatch();
    bigTableBatchStream.reset();
    while (bigTableBatchStream.isNext()) {
        batch.reset();
        bigTableBatchStream.fillNext(batch);
        final int size = testData.bigTableBatch.size;
        for (int r = 0; r < size; r++) {
            vectorExtractRow.extractRow(testData.bigTableBatch, r, bigTableRowObjects);
            // Form key object array
            for (int k = 0; k < bigTableKeyColumnCount; k++) {
                int keyColumnNum = testDesc.bigTableKeyColumnNums[k];
                bigTableKeyObjects[k] = bigTableRowObjects[keyColumnNum];
                bigTableKeyObjects[k] = ((PrimitiveObjectInspector) testDesc.bigTableObjectInspectors[keyColumnNum]).copyObject(bigTableKeyObjects[k]);
            }
            RowTestObjects testKey = new RowTestObjects(bigTableKeyObjects);
            if (testData.smallTableKeyHashMap.containsKey(testKey)) {
                int smallTableKeyIndex = testData.smallTableKeyHashMap.get(testKey);
                switch(testDesc.vectorMapJoinVariation) {
                    case INNER:
                    case OUTER:
                        {
                            // One row per value.
                            ArrayList<RowTestObjects> valueList = testData.smallTableValues.get(smallTableKeyIndex);
                            final int valueCount = valueList.size();
                            for (int v = 0; v < valueCount; v++) {
                                Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
                                addBigTableRetained(testDesc, bigTableRowObjects, outputObjects);
                                Object[] valueRow = valueList.get(v).getRow();
                                final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length;
                                final int smallTableRetainValueColumnNumsLength = testDesc.smallTableRetainValueColumnNums.length;
                                for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) {
                                    outputObjects[bigTableRetainColumnNumsLength + o] = valueRow[testDesc.smallTableRetainValueColumnNums[o]];
                                }
                                addToOutput(testDesc, expectedTestRowMultiSet, outputObjects);
                            }
                        }
                        break;
                    case INNER_BIG_ONLY:
                        {
                            // Value count rows.
                            final int valueCount = testData.smallTableValueCounts.get(smallTableKeyIndex);
                            for (int v = 0; v < valueCount; v++) {
                                Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
                                addBigTableRetained(testDesc, bigTableRowObjects, outputObjects);
                                addToOutput(testDesc, expectedTestRowMultiSet, outputObjects);
                            }
                        }
                        break;
                    case LEFT_SEMI:
                        {
                            // One row (existence).
                            Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
                            addBigTableRetained(testDesc, bigTableRowObjects, outputObjects);
                            addToOutput(testDesc, expectedTestRowMultiSet, outputObjects);
                        }
                        break;
                    default:
                        throw new RuntimeException("Unknown operator variation " + testDesc.vectorMapJoinVariation);
                }
            } else {
                if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER) {
                    // We need to add a non-match row with nulls for small table values.
                    Object[] outputObjects = new Object[testDesc.outputColumnNames.length];
                    addBigTableRetained(testDesc, bigTableRowObjects, outputObjects);
                    final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length;
                    final int smallTableRetainValueColumnNumsLength = testDesc.smallTableRetainValueColumnNums.length;
                    for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) {
                        outputObjects[bigTableRetainColumnNumsLength + o] = null;
                    }
                    addToOutput(testDesc, expectedTestRowMultiSet, outputObjects);
                }
            }
        }
    }
    return expectedTestRowMultiSet;
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) VectorBatchGenerateStream(org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateStream) ArrayList(java.util.ArrayList) RowTestObjectsMultiSet(org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet) VectorExtractRow(org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow) RowTestObjects(org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects)

Example 5 with RowTestObjects

use of org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects in project hive by apache.

the class RowVectorCollectorTestOperator method process.

@Override
public void process(Object row, int tag) throws HiveException {
    VectorizedRowBatch batch = (VectorizedRowBatch) row;
    rowCount += batch.size;
    boolean selectedInUse = batch.selectedInUse;
    int[] selected = batch.selected;
    for (int logical = 0; logical < batch.size; logical++) {
        int batchIndex = (selectedInUse ? selected[logical] : logical);
        Object[] rowObjects = new Object[outputObjectInspectors.length];
        vectorExtractRow.extractRow(batch, batchIndex, rowObjects);
        for (int c = 0; c < rowObjects.length; c++) {
            rowObjects[c] = ((PrimitiveObjectInspector) outputObjectInspectors[c]).copyObject(rowObjects[c]);
        }
        nextTestRow(new RowTestObjects(rowObjects));
    }
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) RowTestObjects(org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects)

Aggregations

RowTestObjects (org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects)5 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)2 ArrayList (java.util.ArrayList)1 RowTestObjectsMultiSet (org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet)1 VectorExtractRow (org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow)1 VectorBatchGenerateStream (org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateStream)1 Output (org.apache.hadoop.hive.serde2.ByteStream.Output)1 BinarySortableSerializeWrite (org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite)1 LazyBinarySerializeWrite (org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite)1 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)1 BytesWritable (org.apache.hadoop.io.BytesWritable)1 Writable (org.apache.hadoop.io.Writable)1