use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class TestVectorGroupByOperator method testKeyTypeAggregate.
private void testKeyTypeAggregate(String aggregateName, FakeVectorRowBatchFromObjectIterables data, Map<Object, Object> expected) throws HiveException {
List<String> mapColumnNames = new ArrayList<String>();
mapColumnNames.add("Key");
mapColumnNames.add("Value");
VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
Set<Object> keys = new HashSet<Object>();
AggregationDesc agg = buildAggregationDesc(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "Value", TypeInfoFactory.getPrimitiveTypeInfo(data.getTypes()[1]));
ArrayList<AggregationDesc> aggs = new ArrayList<AggregationDesc>();
aggs.add(agg);
ArrayList<String> outputColumnNames = new ArrayList<String>();
outputColumnNames.add("_col0");
outputColumnNames.add("_col1");
GroupByDesc desc = new GroupByDesc();
desc.setVectorDesc(new VectorGroupByDesc());
desc.setOutputColumnNames(outputColumnNames);
desc.setAggregators(aggs);
((VectorGroupByDesc) desc.getVectorDesc()).setProcessingMode(ProcessingMode.HASH);
ExprNodeDesc keyExp = buildColumnDesc(ctx, "Key", TypeInfoFactory.getPrimitiveTypeInfo(data.getTypes()[0]));
ArrayList<ExprNodeDesc> keysDesc = new ArrayList<ExprNodeDesc>();
keysDesc.add(keyExp);
desc.setKeys(keysDesc);
CompilationOpContext cCtx = new CompilationOpContext();
Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx);
FakeCaptureOutputOperator out = FakeCaptureOutputOperator.addCaptureOutputChild(cCtx, vgo);
vgo.initialize(hconf, null);
out.setOutputInspector(new FakeCaptureOutputOperator.OutputInspector() {
private int rowIndex;
private String aggregateName;
private Map<Object, Object> expected;
private Set<Object> keys;
@Override
public void inspectRow(Object row, int tag) throws HiveException {
assertTrue(row instanceof Object[]);
Object[] fields = (Object[]) row;
assertEquals(2, fields.length);
Object key = fields[0];
Object keyValue = null;
if (null == key) {
keyValue = null;
} else if (key instanceof ByteWritable) {
ByteWritable bwKey = (ByteWritable) key;
keyValue = bwKey.get();
} else if (key instanceof ShortWritable) {
ShortWritable swKey = (ShortWritable) key;
keyValue = swKey.get();
} else if (key instanceof IntWritable) {
IntWritable iwKey = (IntWritable) key;
keyValue = iwKey.get();
} else if (key instanceof LongWritable) {
LongWritable lwKey = (LongWritable) key;
keyValue = lwKey.get();
} else if (key instanceof TimestampWritable) {
TimestampWritable twKey = (TimestampWritable) key;
keyValue = twKey.getTimestamp();
} else if (key instanceof DoubleWritable) {
DoubleWritable dwKey = (DoubleWritable) key;
keyValue = dwKey.get();
} else if (key instanceof FloatWritable) {
FloatWritable fwKey = (FloatWritable) key;
keyValue = fwKey.get();
} else if (key instanceof BooleanWritable) {
BooleanWritable bwKey = (BooleanWritable) key;
keyValue = bwKey.get();
} else if (key instanceof HiveDecimalWritable) {
HiveDecimalWritable hdwKey = (HiveDecimalWritable) key;
keyValue = hdwKey.getHiveDecimal();
} else {
Assert.fail(String.format("Not implemented key output type %s: %s", key.getClass().getName(), key));
}
String keyValueAsString = String.format("%s", keyValue);
assertTrue(expected.containsKey(keyValue));
Object expectedValue = expected.get(keyValue);
Object value = fields[1];
Validator validator = getValidator(aggregateName);
validator.validate(keyValueAsString, expectedValue, new Object[] { value });
keys.add(keyValue);
}
private FakeCaptureOutputOperator.OutputInspector init(String aggregateName, Map<Object, Object> expected, Set<Object> keys) {
this.aggregateName = aggregateName;
this.expected = expected;
this.keys = keys;
return this;
}
}.init(aggregateName, expected, keys));
for (VectorizedRowBatch unit : data) {
vgo.process(unit, 0);
}
vgo.close(false);
List<Object> outBatchList = out.getCapturedRows();
assertNotNull(outBatchList);
assertEquals(expected.size(), outBatchList.size());
assertEquals(expected.size(), keys.size());
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class TestVectorUDFAdaptor method testMultiArgumentUDF.
@Test
public void testMultiArgumentUDF() {
// create a syntax tree for a function call "testudf(col0, col1, col2)"
ExprNodeGenericFuncDesc funcDesc;
TypeInfo typeInfoStr = TypeInfoFactory.stringTypeInfo;
TypeInfo typeInfoLong = TypeInfoFactory.longTypeInfo;
TypeInfo typeInfoDbl = TypeInfoFactory.doubleTypeInfo;
GenericUDFBridge genericUDFBridge = new GenericUDFBridge("testudf", false, ConcatTextLongDoubleUDF.class.getName());
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
children.add(new ExprNodeColumnDesc(typeInfoStr, "col0", "tablename", false));
children.add(new ExprNodeColumnDesc(typeInfoLong, "col1", "tablename", false));
children.add(new ExprNodeColumnDesc(typeInfoDbl, "col2", "tablename", false));
VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[3];
for (int i = 0; i < 3; i++) {
argDescs[i] = new VectorUDFArgDesc();
argDescs[i].setVariable(i);
}
funcDesc = new ExprNodeGenericFuncDesc(typeInfoStr, genericUDFBridge, genericUDFBridge.getUdfName(), children);
// create the adaptor for this function call to work in vector mode
VectorUDFAdaptor vudf = null;
try {
vudf = new VectorUDFAdaptor(funcDesc, 3, "String", argDescs);
} catch (HiveException e) {
// We should never get here.
assertTrue(false);
throw new RuntimeException(e);
}
// with no nulls
VectorizedRowBatch b = getBatchStrDblLongWithStrOut();
vudf.evaluate(b);
byte[] result = null;
byte[] result2 = null;
try {
result = "red:1:1.0".getBytes("UTF-8");
result2 = "blue:0:0.0".getBytes("UTF-8");
} catch (Exception e) {
;
}
BytesColumnVector out = (BytesColumnVector) b.cols[3];
int cmp = StringExpr.compare(result, 0, result.length, out.vector[1], out.start[1], out.length[1]);
assertEquals(0, cmp);
assertTrue(out.noNulls);
// with nulls
b = getBatchStrDblLongWithStrOut();
b.cols[1].noNulls = false;
vudf.evaluate(b);
out = (BytesColumnVector) b.cols[3];
assertFalse(out.noNulls);
assertTrue(out.isNull[1]);
// with all input columns repeating
b = getBatchStrDblLongWithStrOut();
b.cols[0].isRepeating = true;
b.cols[1].isRepeating = true;
b.cols[2].isRepeating = true;
vudf.evaluate(b);
out = (BytesColumnVector) b.cols[3];
assertTrue(out.isRepeating);
cmp = StringExpr.compare(result2, 0, result2.length, out.vector[0], out.start[0], out.length[0]);
assertEquals(0, cmp);
assertTrue(out.noNulls);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class TestVectorUDFAdaptor method testLongUDF.
@Test
public void testLongUDF() {
// create a syntax tree for a simple function call "longudf(col0)"
ExprNodeGenericFuncDesc funcDesc;
TypeInfo typeInfo = TypeInfoFactory.longTypeInfo;
GenericUDFBridge genericUDFBridge = new GenericUDFBridge("longudf", false, LongUDF.class.getName());
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>();
ExprNodeColumnDesc colDesc = new ExprNodeColumnDesc(typeInfo, "col0", "tablename", false);
children.add(colDesc);
VectorUDFArgDesc[] argDescs = new VectorUDFArgDesc[1];
argDescs[0] = new VectorUDFArgDesc();
argDescs[0].setVariable(0);
funcDesc = new ExprNodeGenericFuncDesc(typeInfo, genericUDFBridge, genericUDFBridge.getUdfName(), children);
// create the adaptor for this function call to work in vector mode
VectorUDFAdaptor vudf = null;
try {
vudf = new VectorUDFAdaptor(funcDesc, 1, "Long", argDescs);
} catch (HiveException e) {
// We should never get here.
assertTrue(false);
}
VectorizedRowBatch b = getBatchLongInLongOut();
vudf.evaluate(b);
// verify output
LongColumnVector out = (LongColumnVector) b.cols[1];
assertEquals(1000, out.vector[0]);
assertEquals(1001, out.vector[1]);
assertEquals(1002, out.vector[2]);
assertTrue(out.noNulls);
assertFalse(out.isRepeating);
// with nulls
b = getBatchLongInLongOut();
out = (LongColumnVector) b.cols[1];
b.cols[0].noNulls = false;
vudf.evaluate(b);
assertFalse(out.noNulls);
assertEquals(1000, out.vector[0]);
assertEquals(1001, out.vector[1]);
assertTrue(out.isNull[2]);
assertFalse(out.isRepeating);
// with repeating
b = getBatchLongInLongOut();
out = (LongColumnVector) b.cols[1];
b.cols[0].isRepeating = true;
vudf.evaluate(b);
// The implementation may or may not set output it isRepeting.
// That is implementation-defined.
assertTrue(b.cols[1].isRepeating && out.vector[0] == 1000 || !b.cols[1].isRepeating && out.vector[2] == 1000);
assertEquals(3, b.size);
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class TestUtilities method testSerializeTimestamp.
@Test
public void testSerializeTimestamp() {
Timestamp ts = new Timestamp(1374554702000L);
ts.setNanos(123456);
ExprNodeConstantDesc constant = new ExprNodeConstantDesc(ts);
List<ExprNodeDesc> children = new ArrayList<ExprNodeDesc>(1);
children.add(constant);
ExprNodeGenericFuncDesc desc = new ExprNodeGenericFuncDesc(TypeInfoFactory.timestampTypeInfo, new GenericUDFFromUtcTimestamp(), children);
assertEquals(desc.getExprString(), SerializationUtilities.deserializeExpression(SerializationUtilities.serializeExpression(desc)).getExprString());
}
use of org.apache.hadoop.hive.ql.plan.ExprNodeDesc in project hive by apache.
the class TestExecDriver method populateMapRedPlan1.
@SuppressWarnings("unchecked")
private void populateMapRedPlan1(Table src) throws SemanticException {
ArrayList<String> outputColumns = new ArrayList<String>();
for (int i = 0; i < 2; i++) {
outputColumns.add("_col" + i);
}
// map-side work
Operator<ReduceSinkDesc> op1 = OperatorFactory.get(ctx, PlanUtils.getReduceSinkDesc(Utilities.makeList(getStringColumn("key")), Utilities.makeList(getStringColumn("value")), outputColumns, true, -1, 1, -1, AcidUtils.Operation.NOT_ACID));
addMapWork(mr, src, "a", op1);
ReduceWork rWork = new ReduceWork();
rWork.setNumReduceTasks(Integer.valueOf(1));
rWork.setKeyDesc(op1.getConf().getKeySerializeInfo());
rWork.getTagToValueDesc().add(op1.getConf().getValueSerializeInfo());
mr.setReduceWork(rWork);
// reduce side work
Operator<FileSinkDesc> op3 = OperatorFactory.get(ctx, new FileSinkDesc(new Path(tmpdir + File.separator + "mapredplan1.out"), Utilities.defaultTd, false));
List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString() + "." + outputColumns.get(1)));
List<String> colNames = new ArrayList<String>();
colNames.add(HiveConf.getColumnInternalName(2));
Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, colNames), op3);
rWork.setReducer(op2);
}
Aggregations