Search in sources :

Example 36 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class TestVectorGroupByOperator method testAggregateStringIterable.

public void testAggregateStringIterable(String aggregateName, Iterable<VectorizedRowBatch> data, Object expected) throws HiveException {
    List<String> mapColumnNames = new ArrayList<String>();
    mapColumnNames.add("A");
    VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
    Pair<GroupByDesc, VectorGroupByDesc> pair = buildGroupByDescType(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "A", TypeInfoFactory.stringTypeInfo);
    GroupByDesc desc = pair.fst;
    VectorGroupByDesc vectorDesc = pair.snd;
    CompilationOpContext cCtx = new CompilationOpContext();
    Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
    VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorDesc);
    FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo);
    vgo.initialize(hconf, null);
    for (VectorizedRowBatch unit : data) {
        vgo.process(unit, 0);
    }
    vgo.close(false);
    List<Object> outBatchList = out.getCapturedRows();
    assertNotNull(outBatchList);
    assertEquals(1, outBatchList.size());
    Object result = outBatchList.get(0);
    Validator validator = getValidator(aggregateName);
    validator.validate("_total", expected, result);
}
Also used : ArrayList(java.util.ArrayList) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) FakeCaptureVectorToRowOutputOperator(org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc)

Example 37 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class TestVectorSelectOperator method testSelectOperator.

@Test
public void testSelectOperator() throws HiveException {
    List<String> columns = new ArrayList<String>();
    columns.add("a");
    columns.add("b");
    columns.add("c");
    VectorizationContext vc = new VectorizationContext("name", columns);
    SelectDesc selDesc = new SelectDesc(false);
    List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
    ExprNodeColumnDesc colDesc1 = new ExprNodeColumnDesc(Long.class, "a", "table", false);
    ExprNodeColumnDesc colDesc2 = new ExprNodeColumnDesc(Long.class, "b", "table", false);
    ExprNodeColumnDesc colDesc3 = new ExprNodeColumnDesc(Long.class, "c", "table", false);
    ExprNodeGenericFuncDesc plusDesc = new ExprNodeGenericFuncDesc();
    GenericUDF gudf = new GenericUDFOPPlus();
    plusDesc.setGenericUDF(gudf);
    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
    children.add(colDesc1);
    children.add(colDesc2);
    plusDesc.setChildren(children);
    plusDesc.setTypeInfo(TypeInfoFactory.longTypeInfo);
    colList.add(plusDesc);
    colList.add(colDesc3);
    selDesc.setColList(colList);
    List<String> outputColNames = new ArrayList<String>();
    outputColNames.add("_col0");
    outputColNames.add("_col1");
    selDesc.setOutputColumnNames(outputColNames);
    // CONSIDER unwinding ValidatorVectorSelectOperator as a subclass of VectorSelectOperator.
    VectorSelectDesc vectorSelectDesc = new VectorSelectDesc();
    List<ExprNodeDesc> selectColList = selDesc.getColList();
    VectorExpression[] vectorSelectExprs = new VectorExpression[selectColList.size()];
    for (int i = 0; i < selectColList.size(); i++) {
        ExprNodeDesc expr = selectColList.get(i);
        VectorExpression ve = vc.getVectorExpression(expr);
        vectorSelectExprs[i] = ve;
    }
    vectorSelectDesc.setSelectExpressions(vectorSelectExprs);
    vectorSelectDesc.setProjectedOutputColumns(new int[] { 3, 2 });
    ValidatorVectorSelectOperator vso = new ValidatorVectorSelectOperator(new CompilationOpContext(), selDesc, vc, vectorSelectDesc);
    VectorizedRowBatch vrg = VectorizedRowGroupGenUtil.getVectorizedRowBatch(VectorizedRowBatch.DEFAULT_SIZE, 4, 17);
    vso.process(vrg, 0);
}
Also used : ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) GenericUDF(org.apache.hadoop.hive.ql.udf.generic.GenericUDF) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) GenericUDFOPPlus(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPPlus) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) VectorSelectDesc(org.apache.hadoop.hive.ql.plan.VectorSelectDesc) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) VectorSelectDesc(org.apache.hadoop.hive.ql.plan.VectorSelectDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) Test(org.junit.Test)

Example 38 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class DDLTask method mergeFiles.

/**
 * First, make sure the source table/partition is not
 * archived/indexes/non-rcfile. If either of these is true, throw an
 * exception.
 *
 * The way how it does the merge is to create a BlockMergeTask from the
 * mergeFilesDesc.
 *
 * @param db
 * @param mergeFilesDesc
 * @return
 * @throws HiveException
 */
private int mergeFiles(Hive db, AlterTablePartMergeFilesDesc mergeFilesDesc, DriverContext driverContext) throws HiveException {
    ListBucketingCtx lbCtx = mergeFilesDesc.getLbCtx();
    boolean lbatc = lbCtx == null ? false : lbCtx.isSkewedStoredAsDir();
    int lbd = lbCtx == null ? 0 : lbCtx.calculateListBucketingLevel();
    // merge work only needs input and output.
    MergeFileWork mergeWork = new MergeFileWork(mergeFilesDesc.getInputDir(), mergeFilesDesc.getOutputDir(), mergeFilesDesc.getInputFormatClass().getName(), mergeFilesDesc.getTableDesc());
    LinkedHashMap<Path, ArrayList<String>> pathToAliases = new LinkedHashMap<>();
    ArrayList<String> inputDirstr = new ArrayList<String>(1);
    inputDirstr.add(mergeFilesDesc.getInputDir().toString());
    pathToAliases.put(mergeFilesDesc.getInputDir().get(0), inputDirstr);
    mergeWork.setPathToAliases(pathToAliases);
    mergeWork.setListBucketingCtx(mergeFilesDesc.getLbCtx());
    mergeWork.resolveConcatenateMerge(db.getConf());
    mergeWork.setMapperCannotSpanPartns(true);
    mergeWork.setSourceTableInputFormat(mergeFilesDesc.getInputFormatClass().getName());
    final FileMergeDesc fmd;
    if (mergeFilesDesc.getInputFormatClass().equals(RCFileInputFormat.class)) {
        fmd = new RCFileMergeDesc();
    } else {
        // safe to assume else is ORC as semantic analyzer will check for RC/ORC
        fmd = new OrcFileMergeDesc();
    }
    fmd.setDpCtx(null);
    fmd.setHasDynamicPartitions(false);
    fmd.setListBucketingAlterTableConcatenate(lbatc);
    fmd.setListBucketingDepth(lbd);
    fmd.setOutputPath(mergeFilesDesc.getOutputDir());
    CompilationOpContext opContext = driverContext.getCtx().getOpContext();
    Operator<? extends OperatorDesc> mergeOp = OperatorFactory.get(opContext, fmd);
    LinkedHashMap<String, Operator<? extends OperatorDesc>> aliasToWork = new LinkedHashMap<String, Operator<? extends OperatorDesc>>();
    aliasToWork.put(mergeFilesDesc.getInputDir().toString(), mergeOp);
    mergeWork.setAliasToWork(aliasToWork);
    DriverContext driverCxt = new DriverContext();
    Task<?> task;
    if (conf.getVar(ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
        TezWork tezWork = new TezWork(queryState.getQueryId(), conf);
        mergeWork.setName("File Merge");
        tezWork.add(mergeWork);
        task = new TezTask();
        ((TezTask) task).setWork(tezWork);
    } else {
        task = new MergeFileTask();
        ((MergeFileTask) task).setWork(mergeWork);
    }
    // initialize the task and execute
    task.initialize(queryState, getQueryPlan(), driverCxt, opContext);
    subtask = task;
    int ret = task.execute(driverCxt);
    if (subtask.getException() != null) {
        setException(subtask.getException());
    }
    return ret;
}
Also used : Path(org.apache.hadoop.fs.Path) DriverContext(org.apache.hadoop.hive.ql.DriverContext) MergeFileWork(org.apache.hadoop.hive.ql.io.merge.MergeFileWork) RCFileMergeDesc(org.apache.hadoop.hive.ql.plan.RCFileMergeDesc) OrcFileMergeDesc(org.apache.hadoop.hive.ql.plan.OrcFileMergeDesc) FileMergeDesc(org.apache.hadoop.hive.ql.plan.FileMergeDesc) RCFileMergeDesc(org.apache.hadoop.hive.ql.plan.RCFileMergeDesc) OrcFileMergeDesc(org.apache.hadoop.hive.ql.plan.OrcFileMergeDesc) ArrayList(java.util.ArrayList) TezTask(org.apache.hadoop.hive.ql.exec.tez.TezTask) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) UniqueConstraint(org.apache.hadoop.hive.ql.metadata.UniqueConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) LinkedHashMap(java.util.LinkedHashMap) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) ListBucketingCtx(org.apache.hadoop.hive.ql.plan.ListBucketingCtx) OperatorDesc(org.apache.hadoop.hive.ql.plan.OperatorDesc) TezWork(org.apache.hadoop.hive.ql.plan.TezWork) MergeFileTask(org.apache.hadoop.hive.ql.io.merge.MergeFileTask)

Example 39 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class SerializationUtilities method cloneOperatorTree.

/**
 * Clones using the powers of XML. Do not use unless necessary.
 * @param roots The roots.
 * @return The clone.
 */
public static List<Operator<?>> cloneOperatorTree(List<Operator<?>> roots) {
    ByteArrayOutputStream baos = new ByteArrayOutputStream(4096);
    CompilationOpContext ctx = roots.isEmpty() ? null : roots.get(0).getCompilationOpContext();
    serializePlan(roots, baos, true);
    @SuppressWarnings("unchecked") List<Operator<?>> result = deserializePlan(new ByteArrayInputStream(baos.toByteArray()), roots.getClass(), true);
    // Restore the context.
    LinkedList<Operator<?>> newOps = new LinkedList<>(result);
    while (!newOps.isEmpty()) {
        Operator<?> newOp = newOps.poll();
        newOp.setCompilationOpContext(ctx);
        List<Operator<?>> children = newOp.getChildOperators();
        if (children != null) {
            newOps.addAll(children);
        }
    }
    return result;
}
Also used : VectorFileSinkOperator(org.apache.hadoop.hive.ql.exec.vector.VectorFileSinkOperator) ByteArrayInputStream(java.io.ByteArrayInputStream) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) ByteArrayOutputStream(java.io.ByteArrayOutputStream) LinkedList(java.util.LinkedList)

Example 40 with CompilationOpContext

use of org.apache.hadoop.hive.ql.CompilationOpContext in project hive by apache.

the class BaseScalarUdfTest method testUdf.

/**
 * This method drives the test. It takes the data from getBaseTable() and
 * feeds it through a SELECT operator with a COLLECT operator after. Each
 * row that is produced by the collect operator is compared to getExpectedResult()
 * and if every row is the expected result the method completes without asserting.
 * @throws HiveException
 */
public final void testUdf() throws HiveException {
    InspectableObject[] data = getBaseTable();
    List<ExprNodeDesc> expressionList = getExpressionList();
    SelectDesc selectCtx = new SelectDesc(expressionList, OperatorTestUtils.createOutputColumnNames(expressionList));
    Operator<SelectDesc> op = OperatorFactory.get(new CompilationOpContext(), SelectDesc.class);
    op.setConf(selectCtx);
    CollectDesc cd = new CollectDesc(Integer.valueOf(10));
    CollectOperator cdop = (CollectOperator) OperatorFactory.getAndMakeChild(cd, op);
    op.initialize(new JobConf(OperatorTestUtils.class), new ObjectInspector[] { data[0].oi });
    OperatorTestUtils.assertResults(op, cdop, data, getExpectedResult());
}
Also used : InspectableObject(org.apache.hadoop.hive.serde2.objectinspector.InspectableObject) CollectDesc(org.apache.hadoop.hive.ql.plan.CollectDesc) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) CollectOperator(org.apache.hadoop.hive.ql.exec.CollectOperator) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

CompilationOpContext (org.apache.hadoop.hive.ql.CompilationOpContext)44 ArrayList (java.util.ArrayList)27 GroupByDesc (org.apache.hadoop.hive.ql.plan.GroupByDesc)12 VectorGroupByDesc (org.apache.hadoop.hive.ql.plan.VectorGroupByDesc)12 JobConf (org.apache.hadoop.mapred.JobConf)12 OperatorDesc (org.apache.hadoop.hive.ql.plan.OperatorDesc)11 FakeCaptureVectorToRowOutputOperator (org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureVectorToRowOutputOperator)10 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)9 Operator (org.apache.hadoop.hive.ql.exec.Operator)8 HashMap (java.util.HashMap)7 LinkedHashMap (java.util.LinkedHashMap)7 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)7 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)6 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 HashSet (java.util.HashSet)5 Configuration (org.apache.hadoop.conf.Configuration)5 Path (org.apache.hadoop.fs.Path)5 VectorizationContext (org.apache.hadoop.hive.ql.exec.vector.VectorizationContext)5 ByteArrayInputStream (java.io.ByteArrayInputStream)4 Set (java.util.Set)4