use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.
the class TestVectorizedORCReader method checkVectorizedReader.
private void checkVectorizedReader() throws Exception {
Reader vreader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf));
RecordReaderImpl vrr = (RecordReaderImpl) vreader.rows();
RecordReaderImpl rr = (RecordReaderImpl) reader.rows();
VectorizedRowBatch batch = reader.getSchema().createRowBatchV2();
OrcStruct row = null;
long lastRowNumber = -1;
// Check Vectorized ORC reader against ORC row reader
while (vrr.nextBatch(batch)) {
Assert.assertEquals(lastRowNumber + 1, vrr.getRowNumber());
for (int i = 0; i < batch.size; i++) {
Assert.assertEquals(rr.getRowNumber(), vrr.getRowNumber() + i);
lastRowNumber = rr.getRowNumber();
row = (OrcStruct) rr.next(row);
for (int j = 0; j < batch.cols.length; j++) {
Object a = (row.getFieldValue(j));
ColumnVector cv = batch.cols[j];
// if the value is repeating, use row 0
int rowId = cv.isRepeating ? 0 : i;
// make sure the null flag agrees
if (a == null) {
Assert.assertEquals(true, !cv.noNulls && cv.isNull[rowId]);
} else if (a instanceof BooleanWritable) {
// Boolean values are stores a 1's and 0's, so convert and compare
Long temp = (long) (((BooleanWritable) a).get() ? 1 : 0);
long b = ((LongColumnVector) cv).vector[rowId];
Assert.assertEquals(temp.toString(), Long.toString(b));
} else if (a instanceof TimestampWritableV2) {
// Timestamps are stored as long, so convert and compare
TimestampWritableV2 t = ((TimestampWritableV2) a);
TimestampColumnVector tcv = ((TimestampColumnVector) cv);
java.sql.Timestamp ts = tcv.asScratchTimestamp(rowId);
Assert.assertEquals(t.getTimestamp(), Timestamp.ofEpochMilli(ts.getTime(), ts.getNanos()));
} else if (a instanceof DateWritableV2) {
// Dates are stored as long, so convert and compare
DateWritableV2 adt = (DateWritableV2) a;
long b = ((LongColumnVector) cv).vector[rowId];
Assert.assertEquals(adt.get().toEpochMilli(), DateWritableV2.daysToMillis((int) b));
} else if (a instanceof HiveDecimalWritable) {
// Decimals are stored as BigInteger, so convert and compare
HiveDecimalWritable dec = (HiveDecimalWritable) a;
HiveDecimalWritable b = ((DecimalColumnVector) cv).vector[i];
Assert.assertEquals(dec, b);
} else if (a instanceof DoubleWritable) {
double b = ((DoubleColumnVector) cv).vector[rowId];
assertEquals(a.toString(), Double.toString(b));
} else if (a instanceof Text) {
BytesColumnVector bcv = (BytesColumnVector) cv;
Text b = new Text();
b.set(bcv.vector[rowId], bcv.start[rowId], bcv.length[rowId]);
assertEquals(a, b);
} else if (a instanceof IntWritable || a instanceof LongWritable || a instanceof ByteWritable || a instanceof ShortWritable) {
assertEquals(a.toString(), Long.toString(((LongColumnVector) cv).vector[rowId]));
} else {
assertEquals("huh", a.getClass().getName());
}
}
}
// Check repeating
Assert.assertEquals(false, batch.cols[0].isRepeating);
Assert.assertEquals(false, batch.cols[1].isRepeating);
Assert.assertEquals(false, batch.cols[2].isRepeating);
Assert.assertEquals(true, batch.cols[3].isRepeating);
Assert.assertEquals(false, batch.cols[4].isRepeating);
Assert.assertEquals(false, batch.cols[5].isRepeating);
Assert.assertEquals(false, batch.cols[6].isRepeating);
Assert.assertEquals(false, batch.cols[7].isRepeating);
Assert.assertEquals(false, batch.cols[8].isRepeating);
Assert.assertEquals(false, batch.cols[9].isRepeating);
// Check non null
Assert.assertEquals(false, batch.cols[0].noNulls);
Assert.assertEquals(false, batch.cols[1].noNulls);
Assert.assertEquals(true, batch.cols[2].noNulls);
Assert.assertEquals(true, batch.cols[3].noNulls);
Assert.assertEquals(false, batch.cols[4].noNulls);
Assert.assertEquals(false, batch.cols[5].noNulls);
Assert.assertEquals(false, batch.cols[6].noNulls);
Assert.assertEquals(false, batch.cols[7].noNulls);
Assert.assertEquals(false, batch.cols[8].noNulls);
Assert.assertEquals(false, batch.cols[9].noNulls);
}
Assert.assertEquals(false, rr.nextBatch(batch));
}
use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.
the class TestParquetSerDe method testParquetHiveSerDe.
@Test
public void testParquetHiveSerDe() throws Throwable {
try {
// Create the SerDe
System.out.println("test: testParquetHiveSerDe");
final ParquetHiveSerDe serDe = new ParquetHiveSerDe();
final Configuration conf = new Configuration();
final Properties tbl = createProperties();
serDe.initialize(conf, tbl, null);
// Data
final Writable[] arr = new Writable[9];
// primitive types
arr[0] = new ByteWritable((byte) 123);
arr[1] = new ShortWritable((short) 456);
arr[2] = new IntWritable(789);
arr[3] = new LongWritable(1000l);
arr[4] = new DoubleWritable((double) 5.3);
arr[5] = new BytesWritable("hive and hadoop and parquet. Big family.".getBytes("UTF-8"));
arr[6] = new BytesWritable("parquetSerde binary".getBytes("UTF-8"));
final Writable[] map = new Writable[3];
for (int i = 0; i < 3; ++i) {
final Writable[] pair = new Writable[2];
pair[0] = new BytesWritable(("key_" + i).getBytes("UTF-8"));
pair[1] = new IntWritable(i);
map[i] = new ArrayWritable(Writable.class, pair);
}
arr[7] = new ArrayWritable(Writable.class, map);
final Writable[] array = new Writable[5];
for (int i = 0; i < 5; ++i) {
array[i] = new BytesWritable(("elem_" + i).getBytes("UTF-8"));
}
arr[8] = new ArrayWritable(Writable.class, array);
final ArrayWritable arrWritable = new ArrayWritable(Writable.class, arr);
// Test
deserializeAndSerializeLazySimple(serDe, arrWritable);
System.out.println("test: testParquetHiveSerDe - OK");
} catch (final Throwable e) {
e.printStackTrace();
throw e;
}
}
use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.
the class GenericUDFTrunc method evaluateNumber.
private Object evaluateNumber(DeferredObject[] arguments) throws HiveException, UDFArgumentTypeException {
if (arguments[0] == null) {
return null;
}
Object input = arguments[0].get();
if (input == null) {
return null;
}
if (arguments.length == 2 && arguments[1] != null && arguments[1].get() != null && !inputSacleConst) {
Object scaleObj = null;
switch(inputScaleOI.getPrimitiveCategory()) {
case BYTE:
scaleObj = byteConverter.convert(arguments[1].get());
scale = ((ByteWritable) scaleObj).get();
break;
case SHORT:
scaleObj = shortConverter.convert(arguments[1].get());
scale = ((ShortWritable) scaleObj).get();
break;
case INT:
scaleObj = intConverter.convert(arguments[1].get());
scale = ((IntWritable) scaleObj).get();
break;
case LONG:
scaleObj = longConverter.convert(arguments[1].get());
long l = ((LongWritable) scaleObj).get();
if (l < Integer.MIN_VALUE || l > Integer.MAX_VALUE) {
throw new UDFArgumentException(getFuncName().toUpperCase() + " scale argument out of allowed range");
}
scale = (int) l;
default:
break;
}
}
switch(inputType1) {
case VOID:
return null;
case DECIMAL:
HiveDecimalWritable decimalWritable = (HiveDecimalWritable) inputOI.getPrimitiveWritableObject(input);
HiveDecimal dec = trunc(decimalWritable.getHiveDecimal(), scale);
if (dec == null) {
return null;
}
return new HiveDecimalWritable(dec);
case BYTE:
ByteWritable byteWritable = (ByteWritable) inputOI.getPrimitiveWritableObject(input);
if (scale >= 0) {
return byteWritable;
} else {
return new ByteWritable((byte) trunc(byteWritable.get(), scale));
}
case SHORT:
ShortWritable shortWritable = (ShortWritable) inputOI.getPrimitiveWritableObject(input);
if (scale >= 0) {
return shortWritable;
} else {
return new ShortWritable((short) trunc(shortWritable.get(), scale));
}
case INT:
IntWritable intWritable = (IntWritable) inputOI.getPrimitiveWritableObject(input);
if (scale >= 0) {
return intWritable;
} else {
return new IntWritable((int) trunc(intWritable.get(), scale));
}
case LONG:
LongWritable longWritable = (LongWritable) inputOI.getPrimitiveWritableObject(input);
if (scale >= 0) {
return longWritable;
} else {
return new LongWritable(trunc(longWritable.get(), scale));
}
case FLOAT:
float f = ((FloatWritable) inputOI.getPrimitiveWritableObject(input)).get();
return new FloatWritable((float) trunc(f, scale));
case DOUBLE:
return trunc(((DoubleWritable) inputOI.getPrimitiveWritableObject(input)), scale);
default:
throw new UDFArgumentTypeException(0, "Only numeric or string group data types are allowed for TRUNC function. Got " + inputType1.name());
}
}
use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.
the class TestVectorGroupByOperator method testKeyTypeAggregate.
private void testKeyTypeAggregate(String aggregateName, FakeVectorRowBatchFromObjectIterables data, Map<Object, Object> expected) throws HiveException {
List<String> mapColumnNames = new ArrayList<String>();
mapColumnNames.add("Key");
mapColumnNames.add("Value");
VectorizationContext ctx = new VectorizationContext("name", mapColumnNames);
Set<Object> keys = new HashSet<Object>();
AggregationDesc agg = buildAggregationDesc(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "Value", TypeInfoFactory.getPrimitiveTypeInfo(data.getTypes()[1]));
ArrayList<AggregationDesc> aggs = new ArrayList<AggregationDesc>();
aggs.add(agg);
ArrayList<String> outputColumnNames = new ArrayList<String>();
outputColumnNames.add("_col0");
outputColumnNames.add("_col1");
GroupByDesc desc = new GroupByDesc();
VectorGroupByDesc vectorGroupByDesc = new VectorGroupByDesc();
desc.setOutputColumnNames(outputColumnNames);
desc.setAggregators(aggs);
vectorGroupByDesc.setProcessingMode(ProcessingMode.HASH);
ExprNodeDesc keyExp = buildColumnDesc(ctx, "Key", TypeInfoFactory.getPrimitiveTypeInfo(data.getTypes()[0]));
ArrayList<ExprNodeDesc> keysDesc = new ArrayList<ExprNodeDesc>();
keysDesc.add(keyExp);
desc.setKeys(keysDesc);
CompilationOpContext cCtx = new CompilationOpContext();
Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorGroupByDesc);
if (vgo == null) {
assertTrue(false);
}
FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo);
vgo.initialize(hconf, null);
out.setOutputInspector(new FakeCaptureVectorToRowOutputOperator.OutputInspector() {
private int rowIndex;
private String aggregateName;
private Map<Object, Object> expected;
private Set<Object> keys;
@Override
public void inspectRow(Object row, int tag) throws HiveException {
assertTrue(row instanceof Object[]);
Object[] fields = (Object[]) row;
assertEquals(2, fields.length);
Object key = fields[0];
Object keyValue = null;
if (null == key) {
keyValue = null;
} else if (key instanceof ByteWritable) {
ByteWritable bwKey = (ByteWritable) key;
keyValue = bwKey.get();
} else if (key instanceof ShortWritable) {
ShortWritable swKey = (ShortWritable) key;
keyValue = swKey.get();
} else if (key instanceof IntWritable) {
IntWritable iwKey = (IntWritable) key;
keyValue = iwKey.get();
} else if (key instanceof LongWritable) {
LongWritable lwKey = (LongWritable) key;
keyValue = lwKey.get();
} else if (key instanceof TimestampWritableV2) {
TimestampWritableV2 twKey = (TimestampWritableV2) key;
keyValue = twKey.getTimestamp().toSqlTimestamp();
} else if (key instanceof DoubleWritable) {
DoubleWritable dwKey = (DoubleWritable) key;
keyValue = dwKey.get();
} else if (key instanceof FloatWritable) {
FloatWritable fwKey = (FloatWritable) key;
keyValue = fwKey.get();
} else if (key instanceof BooleanWritable) {
BooleanWritable bwKey = (BooleanWritable) key;
keyValue = bwKey.get();
} else if (key instanceof HiveDecimalWritable) {
HiveDecimalWritable hdwKey = (HiveDecimalWritable) key;
keyValue = hdwKey.getHiveDecimal();
} else {
Assert.fail(String.format("Not implemented key output type %s: %s", key.getClass().getName(), key));
}
String keyValueAsString = String.format("%s", keyValue);
assertTrue(expected.containsKey(keyValue));
Object expectedValue = expected.get(keyValue);
Object value = fields[1];
Validator validator = getValidator(aggregateName);
validator.validate(keyValueAsString, expectedValue, new Object[] { value });
keys.add(keyValue);
}
private FakeCaptureVectorToRowOutputOperator.OutputInspector init(String aggregateName, Map<Object, Object> expected, Set<Object> keys) {
this.aggregateName = aggregateName;
this.expected = expected;
this.keys = keys;
return this;
}
}.init(aggregateName, expected, keys));
for (VectorizedRowBatch unit : data) {
vgo.process(unit, 0);
}
vgo.close(false);
List<Object> outBatchList = out.getCapturedRows();
assertNotNull(outBatchList);
assertEquals(expected.size(), outBatchList.size());
assertEquals(expected.size(), keys.size());
}
use of org.apache.hadoop.hive.serde2.io.ByteWritable in project hive by apache.
the class TestVectorGroupByOperator method testMultiKey.
private void testMultiKey(String aggregateName, FakeVectorRowBatchFromObjectIterables data, HashMap<Object, Object> expected) throws HiveException {
Map<String, Integer> mapColumnNames = new HashMap<String, Integer>();
ArrayList<String> outputColumnNames = new ArrayList<String>();
ArrayList<ExprNodeDesc> keysDesc = new ArrayList<ExprNodeDesc>();
Set<Object> keys = new HashSet<Object>();
// The types array tells us the number of columns in the data
final String[] columnTypes = data.getTypes();
// Columns 0..N-1 are keys. Column N is the aggregate value input
int i = 0;
for (; i < columnTypes.length - 1; ++i) {
String columnName = String.format("_col%d", i);
mapColumnNames.put(columnName, i);
outputColumnNames.add(columnName);
}
mapColumnNames.put("value", i);
outputColumnNames.add("value");
VectorizationContext ctx = new VectorizationContext("name", outputColumnNames);
ArrayList<AggregationDesc> aggs = new ArrayList(1);
aggs.add(buildAggregationDesc(ctx, aggregateName, GenericUDAFEvaluator.Mode.PARTIAL1, "value", TypeInfoFactory.getPrimitiveTypeInfo(columnTypes[i])));
for (i = 0; i < columnTypes.length - 1; ++i) {
String columnName = String.format("_col%d", i);
keysDesc.add(buildColumnDesc(ctx, columnName, TypeInfoFactory.getPrimitiveTypeInfo(columnTypes[i])));
}
GroupByDesc desc = new GroupByDesc();
VectorGroupByDesc vectorGroupByDesc = new VectorGroupByDesc();
desc.setOutputColumnNames(outputColumnNames);
desc.setAggregators(aggs);
desc.setKeys(keysDesc);
vectorGroupByDesc.setProcessingMode(ProcessingMode.HASH);
CompilationOpContext cCtx = new CompilationOpContext();
Operator<? extends OperatorDesc> groupByOp = OperatorFactory.get(cCtx, desc);
VectorGroupByOperator vgo = (VectorGroupByOperator) Vectorizer.vectorizeGroupByOperator(groupByOp, ctx, vectorGroupByDesc);
FakeCaptureVectorToRowOutputOperator out = FakeCaptureVectorToRowOutputOperator.addCaptureOutputChild(cCtx, vgo);
vgo.initialize(hconf, null);
out.setOutputInspector(new FakeCaptureVectorToRowOutputOperator.OutputInspector() {
private int rowIndex;
private String aggregateName;
private Map<Object, Object> expected;
private Set<Object> keys;
@Override
public void inspectRow(Object row, int tag) throws HiveException {
assertTrue(row instanceof Object[]);
Object[] fields = (Object[]) row;
assertEquals(columnTypes.length, fields.length);
ArrayList<Object> keyValue = new ArrayList<Object>(columnTypes.length - 1);
for (int i = 0; i < columnTypes.length - 1; ++i) {
Object key = fields[i];
if (null == key) {
keyValue.add(null);
} else if (key instanceof Text) {
Text txKey = (Text) key;
keyValue.add(txKey.toString());
} else if (key instanceof ByteWritable) {
ByteWritable bwKey = (ByteWritable) key;
keyValue.add(bwKey.get());
} else if (key instanceof ShortWritable) {
ShortWritable swKey = (ShortWritable) key;
keyValue.add(swKey.get());
} else if (key instanceof IntWritable) {
IntWritable iwKey = (IntWritable) key;
keyValue.add(iwKey.get());
} else if (key instanceof LongWritable) {
LongWritable lwKey = (LongWritable) key;
keyValue.add(lwKey.get());
} else if (key instanceof TimestampWritableV2) {
TimestampWritableV2 twKey = (TimestampWritableV2) key;
keyValue.add(twKey.getTimestamp());
} else if (key instanceof DoubleWritable) {
DoubleWritable dwKey = (DoubleWritable) key;
keyValue.add(dwKey.get());
} else if (key instanceof FloatWritable) {
FloatWritable fwKey = (FloatWritable) key;
keyValue.add(fwKey.get());
} else if (key instanceof BooleanWritable) {
BooleanWritable bwKey = (BooleanWritable) key;
keyValue.add(bwKey.get());
} else {
Assert.fail(String.format("Not implemented key output type %s: %s", key.getClass().getName(), key));
}
}
String keyAsString = Arrays.deepToString(keyValue.toArray());
assertTrue(expected.containsKey(keyValue));
Object expectedValue = expected.get(keyValue);
Object value = fields[columnTypes.length - 1];
Validator validator = getValidator(aggregateName);
validator.validate(keyAsString, expectedValue, new Object[] { value });
keys.add(keyValue);
}
private FakeCaptureVectorToRowOutputOperator.OutputInspector init(String aggregateName, Map<Object, Object> expected, Set<Object> keys) {
this.aggregateName = aggregateName;
this.expected = expected;
this.keys = keys;
return this;
}
}.init(aggregateName, expected, keys));
for (VectorizedRowBatch unit : data) {
vgo.process(unit, 0);
}
vgo.close(false);
List<Object> outBatchList = out.getCapturedRows();
assertNotNull(outBatchList);
assertEquals(expected.size(), outBatchList.size());
assertEquals(expected.size(), keys.size());
}
Aggregations