use of org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestData in project hive by apache.
the class AbstractMapJoin method setupMapJoin.
protected void setupMapJoin(HiveConf hiveConf, long seed, int rowCount, VectorMapJoinVariation vectorMapJoinVariation, MapJoinTestImplementation mapJoinImplementation, String[] bigTableColumnNames, TypeInfo[] bigTableTypeInfos, int[] bigTableKeyColumnNums, String[] smallTableValueColumnNames, TypeInfo[] smallTableValueTypeInfos, int[] bigTableRetainColumnNums, int[] smallTableRetainKeyColumnNums, int[] smallTableRetainValueColumnNums, SmallTableGenerationParameters smallTableGenerationParameters) throws Exception {
this.vectorMapJoinVariation = vectorMapJoinVariation;
this.mapJoinImplementation = mapJoinImplementation;
testDesc = new MapJoinTestDescription(hiveConf, vectorMapJoinVariation, bigTableTypeInfos, bigTableKeyColumnNums, smallTableValueTypeInfos, smallTableRetainKeyColumnNums, smallTableGenerationParameters, MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN);
// Prepare data. Good for ANY implementation variation.
testData = new MapJoinTestData(rowCount, testDesc, seed);
ObjectRegistryImpl objectRegistry = new ObjectRegistryImpl();
ObjectCache.setupObjectRegistry(objectRegistry);
operator = setupBenchmarkImplementation(mapJoinImplementation, testDesc, testData);
isVectorOutput = isVectorOutput(mapJoinImplementation);
/*
* We don't measure data generation execution cost -- generate the big table into memory first.
*/
if (!isVectorOutput) {
bigTableRows = testData.getBigTableBatchSource().getRandomRows();
} else {
ArrayList<VectorizedRowBatch> bigTableBatchList = new ArrayList<VectorizedRowBatch>();
VectorRandomBatchSource batchSource = testData.getBigTableBatchSource();
batchSource.resetBatchIteration();
while (true) {
VectorizedRowBatch batch = testData.createBigTableBatch(testDesc);
if (!batchSource.fillNextBatch(batch)) {
break;
}
bigTableBatchList.add(batch);
}
bigTableBatches = bigTableBatchList.toArray(new VectorizedRowBatch[0]);
}
}
Aggregations