use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.
the class TestVectorGroupByOperator method testMultiKey.
private void testMultiKey(String aggregateName, FakeVectorRowBatchFromObjectIterables data, HashMap<Object, Object> expected) throws HiveException {
Map<String, Integer> mapColumnNames = new HashMap<String, Integer>();
ArrayList<String> outputColumnNames = new ArrayList<String>();
ArrayList<ExprNodeDesc> keysDesc = new ArrayList<ExprNodeDesc>();
Set<Object> keys = new HashSet<Object>();
// The types array tells us the number of columns in the data
final String[] columnTypes = data.getTypes();
// Columns 0..N-1 are keys. Column N is the aggregate value input
int i = 0;
for (; i < columnTypes.length - 1; ++i) {
String columnName = String.format("_col%d", i);
mapColumnNames.put(columnName, i);
outputColumnNames.add(columnName);
}
mapColumnNames.put("value", i);
outputColumnNames.add("value");
VectorizationContext ctx = new VectorizationContext("name", outputColumnNames);
ArrayList<AggregationDesc> aggs = new ArrayList(1);
aggs.add(buildAggregationDesc(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "value", TypeInfoFactory.getPrimitiveTypeInfo(columnTypes[i])));
for (i = 0; i < columnTypes.length - 1; ++i) {
String columnName = String.format("_col%d", i);
keysDesc.add(buildColumnDesc(ctx, columnName, TypeInfoFactory.getPrimitiveTypeInfo(columnTypes[i])));
}
GroupByDesc desc = new GroupByDesc();
VectorGroupByDesc vectorGroupByDesc = new VectorGroupByDesc();
desc.setOutputColumnNames(outputColumnNames);
desc.setAggregators(aggs);
desc.setKeys(keysDesc);
vectorGroupByDesc.setProcessingMode(ProcessingMode.HASH);
CompilationOpContext cCtx = new CompilationOpContext();
Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorGroupByDesc);
FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo);
vgo.initialize(hconf, null);
out.setOutputInspector(new FakeCaptureVectorToRowOutputOperator.OutputInspector() {
private int rowIndex;
private String aggregateName;
private Map<Object, Object> expected;
private Set<Object> keys;
@Override
public void inspectRow(Object row, int tag) throws HiveException {
assertTrue(row instanceof Object[]);
Object[] fields = (Object[]) row;
assertEquals(columnTypes.length, fields.length);
ArrayList<Object> keyValue = new ArrayList<Object>(columnTypes.length - 1);
for (int i = 0; i < columnTypes.length - 1; ++i) {
Object key = fields[i];
if (null == key) {
keyValue.add(null);
} else if (key instanceof Text) {
Text txKey = (Text) key;
keyValue.add(txKey.toString());
} else if (key instanceof ByteWritable) {
ByteWritable bwKey = (ByteWritable) key;
keyValue.add(bwKey.get());
} else if (key instanceof ShortWritable) {
ShortWritable swKey = (ShortWritable) key;
keyValue.add(swKey.get());
} else if (key instanceof IntWritable) {
IntWritable iwKey = (IntWritable) key;
keyValue.add(iwKey.get());
} else if (key instanceof LongWritable) {
LongWritable lwKey = (LongWritable) key;
keyValue.add(lwKey.get());
} else if (key instanceof TimestampWritableV2) {
TimestampWritableV2 twKey = (TimestampWritableV2) key;
keyValue.add(twKey.getTimestamp());
} else if (key instanceof DoubleWritable) {
DoubleWritable dwKey = (DoubleWritable) key;
keyValue.add(dwKey.get());
} else if (key instanceof FloatWritable) {
FloatWritable fwKey = (FloatWritable) key;
keyValue.add(fwKey.get());
} else if (key instanceof BooleanWritable) {
BooleanWritable bwKey = (BooleanWritable) key;
keyValue.add(bwKey.get());
} else {
Assert.fail(String.format("Not implemented key output type %s: %s", key.getClass().getName(), key));
}
}
String keyAsString = Arrays.deepToString(keyValue.toArray());
assertTrue(expected.containsKey(keyValue));
Object expectedValue = expected.get(keyValue);
Object value = fields[columnTypes.length - 1];
Validator validator = getValidator(aggregateName);
validator.validate(keyAsString, expectedValue, new Object[] { value });
keys.add(keyValue);
}
private FakeCaptureVectorToRowOutputOperator.OutputInspector init(String aggregateName, Map<Object, Object> expected, Set<Object> keys) {
this.aggregateName = aggregateName;
this.expected = expected;
this.keys = keys;
return this;
}
}.init(aggregateName, expected, keys));
for (VectorizedRowBatch unit : data) {
vgo.process(unit, 0);
}
vgo.close(false);
List<Object> outBatchList = out.getCapturedRows();
assertNotNull(outBatchList);
assertEquals(expected.size(), outBatchList.size());
assertEquals(expected.size(), keys.size());
}
use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.
the class TestVectorGroupByOperator method testKeyTypeAggregate.
private void testKeyTypeAggregate(String aggregateName, FakeVectorRowBatchFromObjectIterables data, Map<Object, Object> expected) throws HiveException {
List<String> mapColumnNames = new ArrayList<String>();
mapColumnNames.add("Key");
mapColumnNames.add("Value");
VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
Set<Object> keys = new HashSet<Object>();
AggregationDesc agg = buildAggregationDesc(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "Value", TypeInfoFactory.getPrimitiveTypeInfo(data.getTypes()[1]));
ArrayList<AggregationDesc> aggs = new ArrayList<AggregationDesc>();
aggs.add(agg);
ArrayList<String> outputColumnNames = new ArrayList<String>();
outputColumnNames.add("_col0");
outputColumnNames.add("_col1");
GroupByDesc desc = new GroupByDesc();
VectorGroupByDesc vectorGroupByDesc = new VectorGroupByDesc();
desc.setOutputColumnNames(outputColumnNames);
desc.setAggregators(aggs);
vectorGroupByDesc.setProcessingMode(ProcessingMode.HASH);
ExprNodeDesc keyExp = buildColumnDesc(ctx, "Key", TypeInfoFactory.getPrimitiveTypeInfo(data.getTypes()[0]));
ArrayList<ExprNodeDesc> keysDesc = new ArrayList<ExprNodeDesc>();
keysDesc.add(keyExp);
desc.setKeys(keysDesc);
CompilationOpContext cCtx = new CompilationOpContext();
Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorGroupByDesc);
if (vgo == null) {
assertTrue(false);
}
FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo);
vgo.initialize(hconf, null);
out.setOutputInspector(new FakeCaptureVectorToRowOutputOperator.OutputInspector() {
private int rowIndex;
private String aggregateName;
private Map<Object, Object> expected;
private Set<Object> keys;
@Override
public void inspectRow(Object row, int tag) throws HiveException {
assertTrue(row instanceof Object[]);
Object[] fields = (Object[]) row;
assertEquals(2, fields.length);
Object key = fields[0];
Object keyValue = null;
if (null == key) {
keyValue = null;
} else if (key instanceof ByteWritable) {
ByteWritable bwKey = (ByteWritable) key;
keyValue = bwKey.get();
} else if (key instanceof ShortWritable) {
ShortWritable swKey = (ShortWritable) key;
keyValue = swKey.get();
} else if (key instanceof IntWritable) {
IntWritable iwKey = (IntWritable) key;
keyValue = iwKey.get();
} else if (key instanceof LongWritable) {
LongWritable lwKey = (LongWritable) key;
keyValue = lwKey.get();
} else if (key instanceof TimestampWritableV2) {
TimestampWritableV2 twKey = (TimestampWritableV2) key;
keyValue = twKey.getTimestamp().toSqlTimestamp();
} else if (key instanceof DoubleWritable) {
DoubleWritable dwKey = (DoubleWritable) key;
keyValue = dwKey.get();
} else if (key instanceof FloatWritable) {
FloatWritable fwKey = (FloatWritable) key;
keyValue = fwKey.get();
} else if (key instanceof BooleanWritable) {
BooleanWritable bwKey = (BooleanWritable) key;
keyValue = bwKey.get();
} else if (key instanceof HiveDecimalWritable) {
HiveDecimalWritable hdwKey = (HiveDecimalWritable) key;
keyValue = hdwKey.getHiveDecimal();
} else {
Assert.fail(String.format("Not implemented key output type %s: %s", key.getClass().getName(), key));
}
String keyValueAsString = String.format("%s", keyValue);
assertTrue(expected.containsKey(keyValue));
Object expectedValue = expected.get(keyValue);
Object value = fields[1];
Validator validator = getValidator(aggregateName);
validator.validate(keyValueAsString, expectedValue, new Object[] { value });
keys.add(keyValue);
}
private FakeCaptureVectorToRowOutputOperator.OutputInspector init(String aggregateName, Map<Object, Object> expected, Set<Object> keys) {
this.aggregateName = aggregateName;
this.expected = expected;
this.keys = keys;
return this;
}
}.init(aggregateName, expected, keys));
for (VectorizedRowBatch unit : data) {
vgo.process(unit, 0);
}
vgo.close(false);
List<Object> outBatchList = out.getCapturedRows();
assertNotNull(outBatchList);
assertEquals(expected.size(), outBatchList.size());
assertEquals(expected.size(), keys.size());
}
use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.
the class TestRCFile method testSimpleReadAndWrite.
@Test
public void testSimpleReadAndWrite() throws IOException, SerDeException {
cleanup();
byte[][] record_1 = { "123".getBytes(StandardCharsets.UTF_8), "456".getBytes(StandardCharsets.UTF_8), "789".getBytes(StandardCharsets.UTF_8), "1000".getBytes(StandardCharsets.UTF_8), "5.3".getBytes(StandardCharsets.UTF_8), "hive and hadoop".getBytes(StandardCharsets.UTF_8), new byte[0], "NULL".getBytes(StandardCharsets.UTF_8) };
byte[][] record_2 = { "100".getBytes(StandardCharsets.UTF_8), "200".getBytes(StandardCharsets.UTF_8), "123".getBytes(StandardCharsets.UTF_8), "1000".getBytes(StandardCharsets.UTF_8), "5.3".getBytes(StandardCharsets.UTF_8), "hive and hadoop".getBytes(StandardCharsets.UTF_8), new byte[0], "NULL".getBytes(StandardCharsets.UTF_8) };
RCFileOutputFormat.setColumnNumber(conf, expectedFieldsData.length);
RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, RCFile.createMetadata(new Text("apple"), new Text("block"), new Text("cat"), new Text("dog")), new DefaultCodec());
BytesRefArrayWritable bytes = new BytesRefArrayWritable(record_1.length);
for (int i = 0; i < record_1.length; i++) {
BytesRefWritable cu = new BytesRefWritable(record_1[i], 0, record_1[i].length);
bytes.set(i, cu);
}
writer.append(bytes);
bytes.clear();
for (int i = 0; i < record_2.length; i++) {
BytesRefWritable cu = new BytesRefWritable(record_2[i], 0, record_2[i].length);
bytes.set(i, cu);
}
writer.append(bytes);
writer.close();
Object[] expectedRecord_1 = { new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, null };
Object[] expectedRecord_2 = { new ByteWritable((byte) 100), new ShortWritable((short) 200), new IntWritable(123), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"), null, null };
RCFile.Reader reader = new RCFile.Reader(fs, file, conf);
assertEquals(new Text("block"), reader.getMetadata().get(new Text("apple")));
assertEquals(new Text("block"), reader.getMetadataValueOf(new Text("apple")));
assertEquals(new Text("dog"), reader.getMetadataValueOf(new Text("cat")));
LongWritable rowID = new LongWritable();
for (int i = 0; i < 2; i++) {
reader.next(rowID);
BytesRefArrayWritable cols = new BytesRefArrayWritable();
reader.getCurrentRow(cols);
cols.resetValid(8);
Object row = serDe.deserialize(cols);
StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
assertEquals("Field size should be 8", 8, fieldRefs.size());
for (int j = 0; j < fieldRefs.size(); j++) {
Object fieldData = oi.getStructFieldData(row, fieldRefs.get(j));
Object standardWritableData = ObjectInspectorUtils.copyToStandardObject(fieldData, fieldRefs.get(j).getFieldObjectInspector(), ObjectInspectorCopyOption.WRITABLE);
if (i == 0) {
assertEquals("Field " + i, standardWritableData, expectedRecord_1[j]);
} else {
assertEquals("Field " + i, standardWritableData, expectedRecord_2[j]);
}
}
}
reader.close();
}
use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.
the class TestGenericUDFOPDivide method testByteDivideShort.
@Test
public void testByteDivideShort() throws HiveException {
GenericUDFOPDivide udf = new GenericUDFOPDivide();
ByteWritable left = new ByteWritable((byte) 4);
ShortWritable right = new ShortWritable((short) 6);
ObjectInspector[] inputOIs = { PrimitiveObjectInspectorFactory.writableByteObjectInspector, PrimitiveObjectInspectorFactory.writableShortObjectInspector };
DeferredObject[] args = { new DeferredJavaObject(left), new DeferredJavaObject(right) };
PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs);
Assert.assertEquals(oi.getTypeInfo(), TypeInfoFactory.getDecimalTypeInfo(9, 6));
HiveDecimalWritable res = (HiveDecimalWritable) udf.evaluate(args);
Assert.assertEquals(HiveDecimal.create("0.666667"), res.getHiveDecimal());
}
use of org.apache.hadoop.hive.serde2.io.ShortWritable in project hive by apache.
the class TestGenericUDFOPPlus method testBytePlusShort.
@Test
public void testBytePlusShort() throws HiveException {
GenericUDFOPPlus udf = new GenericUDFOPPlus();
// Byte
ByteWritable left = new ByteWritable((byte) 4);
ShortWritable right = new ShortWritable((short) 6);
ObjectInspector[] inputOIs = { PrimitiveObjectInspectorFactory.writableByteObjectInspector, PrimitiveObjectInspectorFactory.writableShortObjectInspector };
DeferredObject[] args = { new DeferredJavaObject(left), new DeferredJavaObject(right) };
PrimitiveObjectInspector oi = (PrimitiveObjectInspector) udf.initialize(inputOIs);
Assert.assertEquals(oi.getTypeInfo(), TypeInfoFactory.shortTypeInfo);
ShortWritable res = (ShortWritable) udf.evaluate(args);
Assert.assertEquals(10, res.get());
}
Aggregations