use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class TestLazyBinarySerDe method testLongerSchemaDeserialization.
/**
* Test longer schema deserialization where a smaller struct is serialized and
* it is then deserialized with a bigger struct Here the serialized struct has
* 9 fields and we deserialized to a struct of 10 fields.
*/
void testLongerSchemaDeserialization(Random r) throws Throwable {
StructObjectInspector rowOI1 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA);
String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);
AbstractSerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
serde1.getObjectInspector();
StructObjectInspector rowOI2 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClassBigger.class, ObjectInspectorOptions.JAVA);
String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);
AbstractSerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
ObjectInspector serdeOI2 = serde2.getObjectInspector();
int num = 100;
for (int itest = 0; itest < num; itest++) {
MyTestClass t = new MyTestClass();
ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
t.randomFill(r, extraTypeInfo);
BytesWritable bw = (BytesWritable) serde1.serialize(t, rowOI1);
Object output = serde2.deserialize(bw);
if (0 != compareDiffSizedStructs(t, rowOI1, output, serdeOI2)) {
System.out.println("structs = " + SerDeUtils.getJSONString(t, rowOI1));
System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw));
assertEquals(t, output);
}
}
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class TestLazyBinarySerDe method testLazyBinaryMap.
void testLazyBinaryMap(Random r) throws Throwable {
StructObjectInspector rowOI = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClassBigger.class, ObjectInspectorOptions.JAVA);
String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI);
String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI);
AbstractSerDe serde = getSerDe(fieldNames, fieldTypes);
ObjectInspector serdeOI = serde.getObjectInspector();
StructObjectInspector soi1 = (StructObjectInspector) serdeOI;
List<? extends StructField> fields1 = soi1.getAllStructFieldRefs();
LazyBinaryMapObjectInspector lazympoi = (LazyBinaryMapObjectInspector) fields1.get(MyTestClassBigger.mapPos).getFieldObjectInspector();
ObjectInspector lazympkeyoi = lazympoi.getMapKeyObjectInspector();
ObjectInspector lazympvalueoi = lazympoi.getMapValueObjectInspector();
StructObjectInspector soi2 = rowOI;
List<? extends StructField> fields2 = soi2.getAllStructFieldRefs();
MapObjectInspector inputmpoi = (MapObjectInspector) fields2.get(MyTestClassBigger.mapPos).getFieldObjectInspector();
ObjectInspector inputmpkeyoi = inputmpoi.getMapKeyObjectInspector();
ObjectInspector inputmpvalueoi = inputmpoi.getMapValueObjectInspector();
int num = 100;
for (int testi = 0; testi < num; testi++) {
Map<String, List<MyTestInnerStruct>> mp = new LinkedHashMap<String, List<MyTestInnerStruct>>();
int randFields = r.nextInt(10);
for (int i = 0; i < randFields; i++) {
String key = MyTestPrimitiveClass.getRandString(r);
int randField = r.nextInt(10);
List<MyTestInnerStruct> value = randField > 4 ? null : getRandStructArray(r);
mp.put(key, value);
}
MyTestClassBigger t = new MyTestClassBigger();
t.myMap = mp;
BytesWritable bw = (BytesWritable) serde.serialize(t, rowOI);
Object output = serde.deserialize(bw);
Object lazyobj = soi1.getStructFieldData(output, fields1.get(MyTestClassBigger.mapPos));
Map<?, ?> outputmp = lazympoi.getMap(lazyobj);
if (outputmp.size() != mp.size()) {
throw new RuntimeException("Map size changed from " + mp.size() + " to " + outputmp.size() + " after serialization!");
}
for (Map.Entry<?, ?> entryinput : mp.entrySet()) {
boolean bEqual = false;
for (Map.Entry<?, ?> entryoutput : outputmp.entrySet()) {
// find the same key
if (0 == ObjectInspectorUtils.compare(entryoutput.getKey(), lazympkeyoi, entryinput.getKey(), inputmpkeyoi)) {
if (0 != ObjectInspectorUtils.compare(entryoutput.getValue(), lazympvalueoi, entryinput.getValue(), inputmpvalueoi)) {
assertEquals(entryoutput.getValue(), entryinput.getValue());
} else {
bEqual = true;
}
break;
}
}
if (!bEqual) {
throw new RuntimeException("Could not find matched key in deserialized map : " + entryinput.getKey());
}
}
}
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class TestInputOutputFormat method testDefaultTypes.
@Test
public void testDefaultTypes() throws Exception {
Properties properties = new Properties();
properties.setProperty("columns", "str,str2");
properties.setProperty("columns.types", "string:string");
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
AbstractSerDe serde = new OrcSerde();
HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, StringRow.class, true, properties, Reporter.NULL);
writer.write(serde.serialize(new StringRow("owen"), inspector));
writer.write(serde.serialize(new StringRow("beth"), inspector));
writer.write(serde.serialize(new StringRow("laurel"), inspector));
writer.write(serde.serialize(new StringRow("hazen"), inspector));
writer.write(serde.serialize(new StringRow("colin"), inspector));
writer.write(serde.serialize(new StringRow("miles"), inspector));
writer.close(true);
serde = new OrcSerde();
SerDeUtils.initializeSerDe(serde, conf, properties, null);
inspector = (StructObjectInspector) serde.getObjectInspector();
assertEquals("struct<str:string,str2:string>", inspector.getTypeName());
InputFormat<?, ?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
InputSplit[] splits = in.getSplits(conf, 1);
assertEquals(1, splits.length);
// read the whole file
conf.set("columns", StringRow.getColumnNamesProperty());
conf.set("columns.types", StringRow.getColumnTypesProperty());
org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
Object key = reader.createKey();
Writable value = (Writable) reader.createValue();
List<? extends StructField> fields = inspector.getAllStructFieldRefs();
StringObjectInspector strInspector = (StringObjectInspector) fields.get(0).getFieldObjectInspector();
assertEquals(true, reader.next(key, value));
assertEquals("owen", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
assertEquals(true, reader.next(key, value));
assertEquals("beth", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
assertEquals(true, reader.next(key, value));
assertEquals("laurel", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
assertEquals(true, reader.next(key, value));
assertEquals("hazen", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
assertEquals(true, reader.next(key, value));
assertEquals("colin", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
assertEquals(true, reader.next(key, value));
assertEquals("miles", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
assertEquals(false, reader.next(key, value));
reader.close();
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class TestInputOutputFormat method testInOutFormat.
@Test
public void testInOutFormat() throws Exception {
Properties properties = new Properties();
properties.setProperty("columns", "x,y");
properties.setProperty("columns.types", "int:int");
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
AbstractSerDe serde = new OrcSerde();
HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true, properties, Reporter.NULL);
writer.write(serde.serialize(new MyRow(1, 2), inspector));
writer.write(serde.serialize(new MyRow(2, 2), inspector));
writer.write(serde.serialize(new MyRow(3, 2), inspector));
writer.close(true);
serde = new OrcSerde();
SerDeUtils.initializeSerDe(serde, conf, properties, null);
assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass());
inspector = (StructObjectInspector) serde.getObjectInspector();
assertEquals("struct<x:int,y:int>", inspector.getTypeName());
InputFormat<?, ?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
InputSplit[] splits = in.getSplits(conf, 1);
assertEquals(1, splits.length);
// the the validate input method
ArrayList<FileStatus> fileList = new ArrayList<FileStatus>();
assertEquals(false, ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
fileList.add(fs.getFileStatus(testFilePath));
assertEquals(true, ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
fileList.add(fs.getFileStatus(workDir));
assertEquals(false, ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
// read the whole file
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
Object key = reader.createKey();
Writable value = (Writable) reader.createValue();
int rowNum = 0;
List<? extends StructField> fields = inspector.getAllStructFieldRefs();
IntObjectInspector intInspector = (IntObjectInspector) fields.get(0).getFieldObjectInspector();
while (reader.next(key, value)) {
assertEquals(++rowNum, intInspector.get(inspector.getStructFieldData(serde.deserialize(value), fields.get(0))));
assertEquals(2, intInspector.get(inspector.getStructFieldData(serde.deserialize(value), fields.get(1))));
}
assertEquals(3, rowNum);
assertEquals(1.0, reader.getProgress(), 0.00001);
reader.close();
// read just the first column
ColumnProjectionUtils.appendReadColumns(conf, Collections.singletonList(0));
reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
key = reader.createKey();
value = (Writable) reader.createValue();
rowNum = 0;
fields = inspector.getAllStructFieldRefs();
while (reader.next(key, value)) {
assertEquals(++rowNum, intInspector.get(inspector.getStructFieldData(value, fields.get(0))));
assertEquals(null, inspector.getStructFieldData(value, fields.get(1)));
}
assertEquals(3, rowNum);
reader.close();
// test the mapping of empty string to all columns
ColumnProjectionUtils.setReadAllColumns(conf);
reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
key = reader.createKey();
value = (Writable) reader.createValue();
rowNum = 0;
fields = inspector.getAllStructFieldRefs();
while (reader.next(key, value)) {
assertEquals(++rowNum, intInspector.get(inspector.getStructFieldData(value, fields.get(0))));
assertEquals(2, intInspector.get(inspector.getStructFieldData(serde.deserialize(value), fields.get(1))));
}
assertEquals(3, rowNum);
reader.close();
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class TestInputOutputFormat method testSplitElimination.
@Test
public void testSplitElimination() throws Exception {
Properties properties = new Properties();
properties.setProperty("columns", "z,r");
properties.setProperty("columns.types", "int:struct<x:int,y:int>");
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
AbstractSerDe serde = new OrcSerde();
OutputFormat<?, ?> outFormat = new OrcOutputFormat();
conf.setInt("mapred.max.split.size", 50);
RecordWriter writer = outFormat.getRecordWriter(fs, conf, testFilePath.toString(), Reporter.NULL);
writer.write(NullWritable.get(), serde.serialize(new NestedRow(1, 2, 3), inspector));
writer.write(NullWritable.get(), serde.serialize(new NestedRow(4, 5, 6), inspector));
writer.write(NullWritable.get(), serde.serialize(new NestedRow(7, 8, 9), inspector));
writer.close(Reporter.NULL);
serde = new OrcSerde();
SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().lessThan("z", PredicateLeaf.Type.LONG, new Long(0)).end().build();
conf.set("sarg.pushdown", toKryo(sarg));
conf.set("hive.io.file.readcolumn.names", "z,r");
SerDeUtils.initializeSerDe(serde, conf, properties, null);
inspector = (StructObjectInspector) serde.getObjectInspector();
InputFormat<?, ?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
InputSplit[] splits = in.getSplits(conf, 1);
assertEquals(0, splits.length);
}
Aggregations