use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.
the class TestVectorGroupByOperator method testAggregateStringIterable.
public void testAggregateStringIterable(String aggregateName, Iterable<VectorizedRowBatch> data, Object expected) throws HiveException {
List<String> mapColumnNames = new ArrayList<String>();
mapColumnNames.add("A");
VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
Pair<GroupByDesc, VectorGroupByDesc> pair = buildGroupByDescType(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.stringTypeInfo);
GroupByDesc desc = pair.fst;
VectorGroupByDesc vectorDesc = pair.snd;
CompilationOpContext cCtx = new CompilationOpContext();
Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorDesc);
FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo);
vgo.initialize(hconf, null);
for (VectorizedRowBatch unit : data) {
vgo.process(unit, 0);
}
vgo.close(false);
List<Object> outBatchList = out.getCapturedRows();
assertNotNull(outBatchList);
assertEquals(1, outBatchList.size());
Object result = outBatchList.get(0);
Validator validator = getValidator(aggregateName);
validator.validate("_total", expected, result);
}
use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.
the class TestVectorSelectOperator method testSelectOperator.
@Test
public void testSelectOperator() throws HiveException {
List<String> columns = new ArrayList<String>();
columns.add("a");
columns.add("b");
columns.add("c");
VectorizationContext vc = new VectorizationContext("name", columns);
SelectDesc selDesc = new SelectDesc(false);
List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
ExprNodeColumnDesc colDesc1 = new ExprNodeColumnDesc(Long.class, "a", "table", false);
ExprNodeColumnDesc colDesc2 = new ExprNodeColumnDesc(Long.class, "b", "table", false);
ExprNodeColumnDesc colDesc3 = new ExprNodeColumnDesc(Long.class, "c", "table", false);
ExprNodeGenericFuncDesc plusDesc = new ExprNodeGenericFuncDesc();
GenericUDF gudf = new GenericUDFOPPlus();
plusDesc.setGenericUDF(gudf);
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(colDesc1);
children.add(colDesc2);
plusDesc.setChildren(children);
plusDesc.setTypeInfo(TypeInfoFactory.longTypeInfo);
colList.add(plusDesc);
colList.add(colDesc3);
selDesc.setColList(colList);
List<String> outputColNames = new ArrayList<String>();
outputColNames.add("_col0");
outputColNames.add("_col1");
selDesc.setOutputColumnNames(outputColNames);
// CONSIDER unwinding ValidatorVectorSelectOperator as a subclass of VectorSelectOperator.
VectorSelectDesc vectorSelectDesc = new VectorSelectDesc();
List<ExprNodeDesc> selectColList = selDesc.getColList();
VectorExpression[] vectorSelectExprs = new VectorExpression[selectColList.size()];
for (int i = 0; i < selectColList.size(); i++) {
ExprNodeDesc expr = selectColList.get(i);
VectorExpression ve = vc.getVectorExpression(expr);
vectorSelectExprs[i] = ve;
}
vectorSelectDesc.setSelectExpressions(vectorSelectExprs);
vectorSelectDesc.setProjectedOutputColumns(new int[] { 3, 2 });
ValidatorVectorSelectOperator vso = new ValidatorVectorSelectOperator(new CompilationOpContext(), selDesc, vc, vectorSelectDesc);
VectorizedRowBatch vrg = VectorizedRowGroupGenUtil.getVectorizedRowBatch(VectorizedRowBatch.DEFAULT_SIZE, 4, 17);
vso.process(vrg, 0);
}
use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.
the class DDLTask method mergeFiles.
/**
* First, make sure the source table/partition is not
* archived/indexes/non-rcfile. If either of these is true, throw an
* exception.
*
* The way how it does the merge is to create a BlockMergeTask from the
* mergeFilesDesc.
*
* @param db
* @param mergeFilesDesc
* @return
* @throws HiveException
*/
private int mergeFiles(Hive db, AlterTablePartMergeFilesDesc mergeFilesDesc, DriverContext driverContext) throws HiveException {
ListBucketingCtx lbCtx = mergeFilesDesc.getLbCtx();
boolean lbatc = lbCtx == null ? false : lbCtx.isSkewedStoredAsDir();
int lbd = lbCtx == null ? 0 : lbCtx.calculateListBucketingLevel();
// merge work only needs input and output.
MergeFileWork mergeWork = new MergeFileWork(mergeFilesDesc.getInputDir(), mergeFilesDesc.getOutputDir(), mergeFilesDesc.getInputFormatClass().getName(), mergeFilesDesc.getTableDesc());
LinkedHashMap<Path, ArrayList<String>> pathToAliases = new LinkedHashMap<>();
ArrayList<String> inputDirstr = new ArrayList<String>(1);
inputDirstr.add(mergeFilesDesc.getInputDir().toString());
pathToAliases.put(mergeFilesDesc.getInputDir().get(0), inputDirstr);
mergeWork.setPathToAliases(pathToAliases);
mergeWork.setListBucketingCtx(mergeFilesDesc.getLbCtx());
mergeWork.resolveConcatenateMerge(db.getConf());
mergeWork.setMapperCannotSpanPartns(true);
mergeWork.setSourceTableInputFormat(mergeFilesDesc.getInputFormatClass().getName());
final FileMergeDesc fmd;
if (mergeFilesDesc.getInputFormatClass().equals(RCFileInputFormat.class)) {
fmd = new RCFileMergeDesc();
} else {
// safe to assume else is ORC as semantic analyzer will check for RC/ORC
fmd = new OrcFileMergeDesc();
}
fmd.setDpCtx(null);
fmd.setHasDynamicPartitions(false);
fmd.setListBucketingAlterTableConcatenate(lbatc);
fmd.setListBucketingDepth(lbd);
fmd.setOutputPath(mergeFilesDesc.getOutputDir());
CompilationOpContext opContext = driverContext.getCtx().getOpContext();
Operator<? extends OperatorDesc> mergeOp = OperatorFactory.get(opContext, fmd);
LinkedHashMap<String, Operator<? extends OperatorDesc>> aliasToWork = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
aliasToWork.put(mergeFilesDesc.getInputDir().toString(), mergeOp);
mergeWork.setAliasToWork(aliasToWork);
DriverContext driverCxt = new DriverContext();
Task<?> task;
if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
TezWork tezWork = new TezWork(queryState.getQueryId(), conf);
mergeWork.setName("File Merge");
tezWork.add(mergeWork);
task = new TezTask();
((TezTask) task).setWork(tezWork);
} else {
task = new MergeFileTask();
((MergeFileTask) task).setWork(mergeWork);
}
// initialize the task and execute
task.initialize(queryState, getQueryPlan(), driverCxt, opContext);
subtask = task;
int ret = task.execute(driverCxt);
if (subtask.getException() != null) {
setException(subtask.getException());
}
return ret;
}
use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.
the class SerializationUtilities method cloneOperatorTree.
/**
* Clones using the powers of XML. Do not use unless necessary.
* @param roots The roots.
* @return The clone.
*/
public static List<Operator<?>> cloneOperatorTree(List<Operator<?>> roots) {
ByteArrayOutputStream baos = new ByteArrayOutputStream(4096);
CompilationOpContext ctx = roots.isEmpty() ? null : roots.get(0).getCompilationOpContext();
serializePlan(roots, baos, true);
@SuppressWarnings("unchecked") List<Operator<?>> result = deserializePlan(new ByteArrayInputStream(baos.toByteArray()), roots.getClass(), true);
// Restore the context.
LinkedList<Operator<?>> newOps = new LinkedList<>(result);
while (!newOps.isEmpty()) {
Operator<?> newOp = newOps.poll();
newOp.setCompilationOpContext(ctx);
List<Operator<?>> children = newOp.getChildOperators();
if (children != null) {
newOps.addAll(children);
}
}
return result;
}
use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.
the class BaseScalarUdfTest method testUdf.
/**
* This method drives the test. It takes the data from getBaseTable() and
* feeds it through a SELECT operator with a COLLECT operator after. Each
* row that is produced by the collect operator is compared to getExpectedResult()
* and if every row is the expected result the method completes without asserting.
* @throws HiveException
*/
public final void testUdf() throws HiveException {
InspectableObject[] data = getBaseTable();
List<ExprNodeDesc> expressionList = getExpressionList();
SelectDesc selectCtx = new SelectDesc(expressionList, OperatorTestUtils.createOutputColumnNames(expressionList));
Operator<SelectDesc> op = OperatorFactory.get(new CompilationOpContext(), SelectDesc.class);
op.setConf(selectCtx);
CollectDesc cd = new CollectDesc(Integer.valueOf(10));
CollectOperator cdop = (CollectOperator) OperatorFactory.getAndMakeChild(cd, op);
op.initialize(new JobConf(OperatorTestUtils.class), new ObjectInspector[] { data[0].oi });
OperatorTestUtils.assertResults(op, cdop, data, getExpectedResult());
}
Aggregations