Search in sources :

Example 16 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class TestVectorGroupByOperator method testKeyTypeAggregate.

private void testKeyTypeAggregate(String aggregateName, FakeVectorRowBatchFromObjectIterables data, Map<Object, Object> expected) throws HiveException {
    List<String> mapColumnNames = new ArrayList<String>();
    mapColumnNames.add("Key");
    mapColumnNames.add("Value");
    VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
    Set<Object> keys = new HashSet<Object>();
    AggregationDesc agg = buildAggregationDesc(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "Value", TypeInfoFactory.getPrimitiveTypeInfo(data.getTypes()[1]));
    ArrayList<AggregationDesc> aggs = new ArrayList<AggregationDesc>();
    aggs.add(agg);
    ArrayList<String> outputColumnNames = new ArrayList<String>();
    outputColumnNames.add("_col0");
    outputColumnNames.add("_col1");
    GroupByDesc desc = new GroupByDesc();
    desc.setVectorDesc(new VectorGroupByDesc());
    desc.setOutputColumnNames(outputColumnNames);
    desc.setAggregators(aggs);
    ((VectorGroupByDesc) desc.getVectorDesc()).setProcessingMode(ProcessingMode.HASH);
    ExprNodeDesc keyExp = buildColumnDesc(ctx, "Key", TypeInfoFactory.getPrimitiveTypeInfo(data.getTypes()[0]));
    ArrayList<ExprNodeDesc> keysDesc = new ArrayList<ExprNodeDesc>();
    keysDesc.add(keyExp);
    desc.setKeys(keysDesc);
    CompilationOpContext cCtx = new CompilationOpContext();
    Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
    VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx);
    FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo);
    vgo.initialize(hconf, null);
    out.setOutputInspector(new FakeCaptureOutputOperator.OutputInspector() {

        private int rowIndex;

        private String aggregateName;

        private Map<Object, Object> expected;

        private Set<Object> keys;

        @Override
        public void inspectRow(Object row, int tag) throws HiveException {
            assertTrue(row instanceof Object[]);
            Object[] fields = (Object[]) row;
            assertEquals(2, fields.length);
            Object key = fields[0];
            Object keyValue = null;
            if (null == key) {
                keyValue = null;
            } else if (key instanceof ByteWritable) {
                ByteWritable bwKey = (ByteWritable) key;
                keyValue = bwKey.get();
            } else if (key instanceof ShortWritable) {
                ShortWritable swKey = (ShortWritable) key;
                keyValue = swKey.get();
            } else if (key instanceof IntWritable) {
                IntWritable iwKey = (IntWritable) key;
                keyValue = iwKey.get();
            } else if (key instanceof LongWritable) {
                LongWritable lwKey = (LongWritable) key;
                keyValue = lwKey.get();
            } else if (key instanceof TimestampWritable) {
                TimestampWritable twKey = (TimestampWritable) key;
                keyValue = twKey.getTimestamp();
            } else if (key instanceof DoubleWritable) {
                DoubleWritable dwKey = (DoubleWritable) key;
                keyValue = dwKey.get();
            } else if (key instanceof FloatWritable) {
                FloatWritable fwKey = (FloatWritable) key;
                keyValue = fwKey.get();
            } else if (key instanceof BooleanWritable) {
                BooleanWritable bwKey = (BooleanWritable) key;
                keyValue = bwKey.get();
            } else if (key instanceof HiveDecimalWritable) {
                HiveDecimalWritable hdwKey = (HiveDecimalWritable) key;
                keyValue = hdwKey.getHiveDecimal();
            } else {
                Assert.fail(String.format("Not implemented key output type %s: %s", key.getClass().getName(), key));
            }
            String keyValueAsString = String.format("%s", keyValue);
            assertTrue(expected.containsKey(keyValue));
            Object expectedValue = expected.get(keyValue);
            Object value = fields[1];
            Validator validator = getValidator(aggregateName);
            validator.validate(keyValueAsString, expectedValue, new Object[] { value });
            keys.add(keyValue);
        }

        private FakeCaptureOutputOperator.OutputInspector init(String aggregateName, Map<Object, Object> expected, Set<Object> keys) {
            this.aggregateName = aggregateName;
            this.expected = expected;
            this.keys = keys;
            return this;
        }
    }.init(aggregateName, expected, keys));
    for (VectorizedRowBatch unit : data) {
        vgo.process(unit, 0);
    }
    vgo.close(false);
    List<Object> outBatchList = out.getCapturedRows();
    assertNotNull(outBatchList);
    assertEquals(expected.size(), outBatchList.size());
    assertEquals(expected.size(), keys.size());
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) ArrayList(java.util.ArrayList) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) LongWritable(org.apache.hadoop.io.LongWritable) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) GroupByDesc(org.apache.hadoop.hive.ql.plan.GroupByDesc) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) HashSet(java.util.HashSet) FakeCaptureOutputOperator(org.apache.hadoop.hive.ql.exec.vector.util.FakeCaptureOutputOperator) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) CompilationOpContext(org.apache.hadoop.hive.ql.CompilationOpContext) VectorGroupByDesc(org.apache.hadoop.hive.ql.plan.VectorGroupByDesc) AggregationDesc(org.apache.hadoop.hive.ql.plan.AggregationDesc) Map(java.util.Map) HashMap(java.util.HashMap)

Example 17 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class TestVectorUDFAdaptor method testMultiArgumentUDF.

@Test
public void testMultiArgumentUDF() {
    // create a syntax tree for a function call "testudf(col0, col1, col2)"
    ExprNodeGenericFuncDesc funcDesc;
    TypeInfo typeInfoStr = TypeInfoFactory.stringTypeInfo;
    TypeInfo typeInfoLong = TypeInfoFactory.longTypeInfo;
    TypeInfo typeInfoDbl = TypeInfoFactory.doubleTypeInfo;
    GenericUDFBridge genericUDFBridge = new GenericUDFBridge("testudf", false, ConcatTextLongDoubleUDF.class.getName());
    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
    children.add(new ExprNodeColumnDesc(typeInfoStr, "col0", "tablename", false));
    children.add(new ExprNodeColumnDesc(typeInfoLong, "col1", "tablename", false));
    children.add(new ExprNodeColumnDesc(typeInfoDbl, "col2", "tablename", false));
    VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[3];
    for (int i = 0; i < 3; i++) {
        argDescs[i] = new VectorUDFArgDesc();
        argDescs[i].setVariable(i);
    }
    funcDesc = new ExprNodeGenericFuncDesc(typeInfoStr, genericUDFBridge, genericUDFBridge.getUdfName(), children);
    // create the adaptor for this function call to work in vector mode
    VectorUDFAdaptor vudf = null;
    try {
        vudf = new VectorUDFAdaptor(funcDesc, 3, "String", argDescs);
    } catch (HiveException e) {
        // We should never get here.
        assertTrue(false);
        throw new RuntimeException(e);
    }
    // with no nulls
    VectorizedRowBatch b = getBatchStrDblLongWithStrOut();
    vudf.evaluate(b);
    byte[] result = null;
    byte[] result2 = null;
    try {
        result = "red:1:1.0".getBytes("UTF-8");
        result2 = "blue:0:0.0".getBytes("UTF-8");
    } catch (Exception e) {
        ;
    }
    BytesColumnVector out = (BytesColumnVector) b.cols[3];
    int cmp = StringExpr.compare(result, 0, result.length, out.vector[1], out.start[1], out.length[1]);
    assertEquals(0, cmp);
    assertTrue(out.noNulls);
    // with nulls
    b = getBatchStrDblLongWithStrOut();
    b.cols[1].noNulls = false;
    vudf.evaluate(b);
    out = (BytesColumnVector) b.cols[3];
    assertFalse(out.noNulls);
    assertTrue(out.isNull[1]);
    // with all input columns repeating
    b = getBatchStrDblLongWithStrOut();
    b.cols[0].isRepeating = true;
    b.cols[1].isRepeating = true;
    b.cols[2].isRepeating = true;
    vudf.evaluate(b);
    out = (BytesColumnVector) b.cols[3];
    assertTrue(out.isRepeating);
    cmp = StringExpr.compare(result2, 0, result2.length, out.vector[0], out.start[0], out.length[0]);
    assertEquals(0, cmp);
    assertTrue(out.noNulls);
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) ConcatTextLongDoubleUDF(org.apache.hadoop.hive.ql.exec.vector.udf.legacy.ConcatTextLongDoubleUDF) Test(org.junit.Test)

Example 18 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class TestVectorUDFAdaptor method testLongUDF.

@Test
public void testLongUDF() {
    // create a syntax tree for a simple function call "longudf(col0)"
    ExprNodeGenericFuncDesc funcDesc;
    TypeInfo typeInfo = TypeInfoFactory.longTypeInfo;
    GenericUDFBridge genericUDFBridge = new GenericUDFBridge("longudf", false, LongUDF.class.getName());
    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
    ExprNodeColumnDesc colDesc = new ExprNodeColumnDesc(typeInfo, "col0", "tablename", false);
    children.add(colDesc);
    VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[1];
    argDescs[0] = new VectorUDFArgDesc();
    argDescs[0].setVariable(0);
    funcDesc = new ExprNodeGenericFuncDesc(typeInfo, genericUDFBridge, genericUDFBridge.getUdfName(), children);
    // create the adaptor for this function call to work in vector mode
    VectorUDFAdaptor vudf = null;
    try {
        vudf = new VectorUDFAdaptor(funcDesc, 1, "Long", argDescs);
    } catch (HiveException e) {
        // We should never get here.
        assertTrue(false);
    }
    VectorizedRowBatch b = getBatchLongInLongOut();
    vudf.evaluate(b);
    // verify output
    LongColumnVector out = (LongColumnVector) b.cols[1];
    assertEquals(1000, out.vector[0]);
    assertEquals(1001, out.vector[1]);
    assertEquals(1002, out.vector[2]);
    assertTrue(out.noNulls);
    assertFalse(out.isRepeating);
    // with nulls
    b = getBatchLongInLongOut();
    out = (LongColumnVector) b.cols[1];
    b.cols[0].noNulls = false;
    vudf.evaluate(b);
    assertFalse(out.noNulls);
    assertEquals(1000, out.vector[0]);
    assertEquals(1001, out.vector[1]);
    assertTrue(out.isNull[2]);
    assertFalse(out.isRepeating);
    // with repeating
    b = getBatchLongInLongOut();
    out = (LongColumnVector) b.cols[1];
    b.cols[0].isRepeating = true;
    vudf.evaluate(b);
    // The implementation may or may not set output it isRepeting.
    // That is implementation-defined.
    assertTrue(b.cols[1].isRepeating && out.vector[0] == 1000 || !b.cols[1].isRepeating && out.vector[2] == 1000);
    assertEquals(3, b.size);
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) GenericUDFBridge(org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) LongUDF(org.apache.hadoop.hive.ql.exec.vector.udf.legacy.LongUDF) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) Test(org.junit.Test)

Example 19 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class TestUtilities method testSerializeTimestamp.

@Test
public void testSerializeTimestamp() {
    Timestamp ts = new Timestamp(1374554702000L);
    ts.setNanos(123456);
    ExprNodeConstantDesc constant = new ExprNodeConstantDesc(ts);
    List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>(1);
    children.add(constant);
    ExprNodeGenericFuncDesc desc = new ExprNodeGenericFuncDesc(TypeInfoFactory.timestampTypeInfo, new GenericUDFFromUtcTimestamp(), children);
    assertEquals(desc.getExprString(), SerializationUtilities.deserializeExpression(SerializationUtilities.serializeExpression(desc)).getExprString());
}
Also used : ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) ArrayList(java.util.ArrayList) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) GenericUDFFromUtcTimestamp(org.apache.hadoop.hive.ql.udf.generic.GenericUDFFromUtcTimestamp) Timestamp(java.sql.Timestamp) GenericUDFFromUtcTimestamp(org.apache.hadoop.hive.ql.udf.generic.GenericUDFFromUtcTimestamp) Test(org.junit.Test)

Example 20 with ExprNodeDesc

use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.

the class TestExecDriver method populateMapRedPlan1.

@SuppressWarnings("unchecked")
private void populateMapRedPlan1(Table src) throws SemanticException {
    ArrayList<String> outputColumns = new ArrayList<String>();
    for (int i = 0; i < 2; i++) {
        outputColumns.add("_col" + i);
    }
    // map-side work
    Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("key")), Utilities.makeList(getStringColumn("value")), outputColumns, true, -1, 1, -1, AcidUtils.Operation.NOT_ACID));
    addMapWork(mr, src, "a", op1);
    ReduceWork rWork = new ReduceWork();
    rWork.setNumReduceTasks(Integer.valueOf(1));
    rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
    rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
    mr.setReduceWork(rWork);
    // reduce side work
    Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapredplan1.out"), Utilities.defaultTd, false));
    List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
    cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString() + "." + outputColumns.get(1)));
    List<String> colNames = new ArrayList<String>();
    colNames.add(HiveConf.getColumnInternalName(2));
    Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, colNames), op3);
    rWork.setReducer(op2);
}
Also used : Path(org.apache.hadoop.fs.Path) FileSinkDesc(org.apache.hadoop.hive.ql.plan.FileSinkDesc) ArrayList(java.util.ArrayList) ReduceWork(org.apache.hadoop.hive.ql.plan.ReduceWork) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SelectDesc(org.apache.hadoop.hive.ql.plan.SelectDesc) ReduceSinkDesc(org.apache.hadoop.hive.ql.plan.ReduceSinkDesc)

Aggregations

ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)321 ArrayList (java.util.ArrayList)179 ExprNodeColumnDesc (org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc)146 ExprNodeGenericFuncDesc (org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc)110 ExprNodeConstantDesc (org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc)101 Test (org.junit.Test)74 ColumnInfo (org.apache.hadoop.hive.ql.exec.ColumnInfo)69 HashMap (java.util.HashMap)67 RowSchema (org.apache.hadoop.hive.ql.exec.RowSchema)57 ReduceSinkOperator (org.apache.hadoop.hive.ql.exec.ReduceSinkOperator)47 LinkedHashMap (java.util.LinkedHashMap)43 SelectOperator (org.apache.hadoop.hive.ql.exec.SelectOperator)42 List (java.util.List)40 Operator (org.apache.hadoop.hive.ql.exec.Operator)39 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)35 GroupByOperator (org.apache.hadoop.hive.ql.exec.GroupByOperator)34 JoinOperator (org.apache.hadoop.hive.ql.exec.JoinOperator)34 TableScanOperator (org.apache.hadoop.hive.ql.exec.TableScanOperator)34 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)33 VectorExpression (org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression)32