Search in sources :

Example 1 with MapJoinTestData

use of org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestData in project hive by apache.

the class AbstractMapJoin method setupMapJoin.

protected void setupMapJoin(HiveConf hiveConf, long seed, int rowCount, VectorMapJoinVariation vectorMapJoinVariation, MapJoinTestImplementation mapJoinImplementation, String[] bigTableColumnNames, TypeInfo[] bigTableTypeInfos, int[] bigTableKeyColumnNums, String[] smallTableValueColumnNames, TypeInfo[] smallTableValueTypeInfos, int[] bigTableRetainColumnNums, int[] smallTableRetainKeyColumnNums, int[] smallTableRetainValueColumnNums, SmallTableGenerationParameters smallTableGenerationParameters) throws Exception {
    this.vectorMapJoinVariation = vectorMapJoinVariation;
    this.mapJoinImplementation = mapJoinImplementation;
    testDesc = new MapJoinTestDescription(hiveConf, vectorMapJoinVariation, bigTableTypeInfos, bigTableKeyColumnNums, smallTableValueTypeInfos, smallTableRetainKeyColumnNums, smallTableGenerationParameters, MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN);
    // Prepare data.  Good for ANY implementation variation.
    testData = new MapJoinTestData(rowCount, testDesc, seed);
    ObjectRegistryImpl objectRegistry = new ObjectRegistryImpl();
    ObjectCache.setupObjectRegistry(objectRegistry);
    operator = setupBenchmarkImplementation(mapJoinImplementation, testDesc, testData);
    isVectorOutput = isVectorOutput(mapJoinImplementation);
    /*
     * We don't measure data generation execution cost -- generate the big table into memory first.
     */
    if (!isVectorOutput) {
        bigTableRows = testData.getBigTableBatchSource().getRandomRows();
    } else {
        ArrayList<VectorizedRowBatch> bigTableBatchList = new ArrayList<VectorizedRowBatch>();
        VectorRandomBatchSource batchSource = testData.getBigTableBatchSource();
        batchSource.resetBatchIteration();
        while (true) {
            VectorizedRowBatch batch = testData.createBigTableBatch(testDesc);
            if (!batchSource.fillNextBatch(batch)) {
                break;
            }
            bigTableBatchList.add(batch);
        }
        bigTableBatches = bigTableBatchList.toArray(new VectorizedRowBatch[0]);
    }
}
Also used : VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) MapJoinTestDescription(org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription) VectorRandomBatchSource(org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource) ObjectRegistryImpl(org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl) ArrayList(java.util.ArrayList) MapJoinTestData(org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestData)

Aggregations

ArrayList (java.util.ArrayList)1 VectorRandomBatchSource (org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource)1 VectorizedRowBatch (org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch)1 MapJoinTestData (org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestData)1 MapJoinTestDescription (org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription)1 ObjectRegistryImpl (org.apache.tez.runtime.common.objectregistry.ObjectRegistryImpl)1