use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class TestLazyBinaryFast method testLazyBinaryFastCase.
public void testLazyBinaryFastCase(int caseNum, boolean doNonRandomFill, Random r, SerdeRandomRowSource.SupportedTypes supportedTypes, int depth) throws Throwable {
SerdeRandomRowSource source = new SerdeRandomRowSource();
source.init(r, supportedTypes, depth);
int rowCount = 100;
Object[][] rows = source.randomRows(rowCount);
if (doNonRandomFill) {
MyTestClass.nonRandomRowFill(rows, source.primitiveCategories());
}
StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector();
TypeInfo[] typeInfos = source.typeInfos();
int columnCount = typeInfos.length;
int writeColumnCount = columnCount;
StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector;
boolean doWriteFewerColumns = r.nextBoolean();
if (doWriteFewerColumns) {
writeColumnCount = 1 + r.nextInt(columnCount);
if (writeColumnCount == columnCount) {
doWriteFewerColumns = false;
} else {
writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount);
}
}
String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector);
String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector);
TestLazyBinarySerDe testLazyBinarySerDe = new TestLazyBinarySerDe();
AbstractSerDe serde = testLazyBinarySerDe.getSerDe(fieldNames, fieldTypes);
AbstractSerDe serde_fewer = null;
if (doWriteFewerColumns) {
String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector);
String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector);
serde_fewer = testLazyBinarySerDe.getSerDe(partialFieldNames, partialFieldTypes);
;
}
testLazyBinaryFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, typeInfos, /* useIncludeColumns */
false, /* doWriteFewerColumns */
false, r);
testLazyBinaryFast(source, rows, serde, rowStructObjectInspector, serde_fewer, writeRowStructObjectInspector, typeInfos, /* useIncludeColumns */
true, /* doWriteFewerColumns */
false, r);
/*
* Can the LazyBinary format really tolerate writing fewer columns?
*/
// if (doWriteFewerColumns) {
// testLazyBinaryFast(
// source, rows,
// serde, rowStructObjectInspector,
// serde_fewer, writeRowStructObjectInspector,
// primitiveTypeInfos,
// /* useIncludeColumns */ false, /* doWriteFewerColumns */ true, r);
// testLazyBinaryFast(
// source, rows,
// serde, rowStructObjectInspector,
// serde_fewer, writeRowStructObjectInspector,
// primitiveTypeInfos,
// /* useIncludeColumns */ true, /* doWriteFewerColumns */ true, r);
// }
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class TestLazyBinarySerDe method testLongerSchemaDeserialization.
/**
* Test longer schema deserialization where a smaller struct is serialized and
* it is then deserialized with a bigger struct Here the serialized struct has
* 9 fields and we deserialized to a struct of 10 fields.
*/
void testLongerSchemaDeserialization(Random r) throws Throwable {
StructObjectInspector rowOI1 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA);
String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);
AbstractSerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
serde1.getObjectInspector();
StructObjectInspector rowOI2 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClassBigger.class, ObjectInspectorOptions.JAVA);
String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);
AbstractSerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
ObjectInspector serdeOI2 = serde2.getObjectInspector();
int num = 100;
for (int itest = 0; itest < num; itest++) {
MyTestClass t = new MyTestClass();
ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
t.randomFill(r, extraTypeInfo);
BytesWritable bw = (BytesWritable) serde1.serialize(t, rowOI1);
Object output = serde2.deserialize(bw);
if (0 != compareDiffSizedStructs(t, rowOI1, output, serdeOI2)) {
System.out.println("structs = " + SerDeUtils.getJSONString(t, rowOI1));
System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw));
assertEquals(t, output);
}
}
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class TestLazyBinarySerDe method testLazyBinaryMap.
void testLazyBinaryMap(Random r) throws Throwable {
StructObjectInspector rowOI = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClassBigger.class, ObjectInspectorOptions.JAVA);
String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI);
String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI);
AbstractSerDe serde = getSerDe(fieldNames, fieldTypes);
ObjectInspector serdeOI = serde.getObjectInspector();
StructObjectInspector soi1 = (StructObjectInspector) serdeOI;
List<? extends StructField> fields1 = soi1.getAllStructFieldRefs();
LazyBinaryMapObjectInspector lazympoi = (LazyBinaryMapObjectInspector) fields1.get(MyTestClassBigger.mapPos).getFieldObjectInspector();
ObjectInspector lazympkeyoi = lazympoi.getMapKeyObjectInspector();
ObjectInspector lazympvalueoi = lazympoi.getMapValueObjectInspector();
StructObjectInspector soi2 = rowOI;
List<? extends StructField> fields2 = soi2.getAllStructFieldRefs();
MapObjectInspector inputmpoi = (MapObjectInspector) fields2.get(MyTestClassBigger.mapPos).getFieldObjectInspector();
ObjectInspector inputmpkeyoi = inputmpoi.getMapKeyObjectInspector();
ObjectInspector inputmpvalueoi = inputmpoi.getMapValueObjectInspector();
int num = 100;
for (int testi = 0; testi < num; testi++) {
Map<String, List<MyTestInnerStruct>> mp = new LinkedHashMap<String, List<MyTestInnerStruct>>();
int randFields = r.nextInt(10);
for (int i = 0; i < randFields; i++) {
String key = MyTestPrimitiveClass.getRandString(r);
int randField = r.nextInt(10);
List<MyTestInnerStruct> value = randField > 4 ? null : getRandStructArray(r);
mp.put(key, value);
}
MyTestClassBigger t = new MyTestClassBigger();
t.myMap = mp;
BytesWritable bw = (BytesWritable) serde.serialize(t, rowOI);
Object output = serde.deserialize(bw);
Object lazyobj = soi1.getStructFieldData(output, fields1.get(MyTestClassBigger.mapPos));
Map<?, ?> outputmp = lazympoi.getMap(lazyobj);
if (outputmp.size() != mp.size()) {
throw new RuntimeException("Map size changed from " + mp.size() + " to " + outputmp.size() + " after serialization!");
}
for (Map.Entry<?, ?> entryinput : mp.entrySet()) {
boolean bEqual = false;
for (Map.Entry<?, ?> entryoutput : outputmp.entrySet()) {
// find the same key
if (0 == ObjectInspectorUtils.compare(entryoutput.getKey(), lazympkeyoi, entryinput.getKey(), inputmpkeyoi)) {
if (0 != ObjectInspectorUtils.compare(entryoutput.getValue(), lazympvalueoi, entryinput.getValue(), inputmpvalueoi)) {
assertEquals(entryoutput.getValue(), entryinput.getValue());
} else {
bEqual = true;
}
break;
}
}
if (!bEqual) {
throw new RuntimeException("Could not find matched key in deserialized map : " + entryinput.getKey());
}
}
}
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class TestLazyBinarySerDe method testShorterSchemaDeserialization.
/**
* Test shorter schema deserialization where a bigger struct is serialized and
* it is then deserialized with a smaller struct. Here the serialized struct
* has 10 fields and we deserialized to a struct of 9 fields.
*/
private void testShorterSchemaDeserialization(Random r) throws Throwable {
StructObjectInspector rowOI1 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClassBigger.class, ObjectInspectorOptions.JAVA);
String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);
AbstractSerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
serde1.getObjectInspector();
StructObjectInspector rowOI2 = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA);
String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);
AbstractSerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
ObjectInspector serdeOI2 = serde2.getObjectInspector();
int num = 100;
for (int itest = 0; itest < num; itest++) {
MyTestClassBigger t = new MyTestClassBigger();
ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo();
t.randomFill(r, extraTypeInfo);
BytesWritable bw = (BytesWritable) serde1.serialize(t, rowOI1);
Object output = serde2.deserialize(bw);
if (0 != compareDiffSizedStructs(t, rowOI1, output, serdeOI2)) {
System.out.println("structs = " + SerDeUtils.getJSONString(t, rowOI1));
System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw));
assertEquals(t, output);
}
}
}
use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.
the class AbstractRecordWriter method init.
@Override
public void init(StreamingConnection conn, long minWriteId, long maxWriteId, int statementId) throws StreamingException {
if (conn == null) {
throw new StreamingException("Streaming connection cannot be null during record writer initialization");
}
this.conn = conn;
this.curBatchMinWriteId = minWriteId;
this.curBatchMaxWriteId = maxWriteId;
this.statementId = statementId;
this.conf = conn.getHiveConf();
this.defaultPartitionName = conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME);
this.table = conn.getTable();
String location = table.getSd().getLocation();
try {
URI uri = new URI(location);
this.fs = FileSystem.newInstance(uri, conf);
if (LOG.isDebugEnabled()) {
LOG.debug("Created new filesystem instance: {}", System.identityHashCode(this.fs));
}
} catch (URISyntaxException e) {
throw new StreamingException("Unable to create URI from location: " + location, e);
} catch (IOException e) {
throw new StreamingException("Unable to get filesystem for location: " + location, e);
}
this.inputColumns = table.getSd().getCols().stream().map(FieldSchema::getName).collect(Collectors.toList());
this.inputTypes = table.getSd().getCols().stream().map(FieldSchema::getType).collect(Collectors.toList());
if (conn.isPartitionedTable() && conn.isDynamicPartitioning()) {
this.partitionColumns = table.getPartitionKeys().stream().map(FieldSchema::getName).collect(Collectors.toList());
this.inputColumns.addAll(partitionColumns);
this.inputTypes.addAll(table.getPartitionKeys().stream().map(FieldSchema::getType).collect(Collectors.toList()));
}
this.fullyQualifiedTableName = Warehouse.getQualifiedName(table.getDbName(), table.getTableName());
String outFormatName = this.table.getSd().getOutputFormat();
try {
this.acidOutputFormat = (AcidOutputFormat<?, ?>) ReflectionUtils.newInstance(JavaUtils.loadClass(outFormatName), conf);
} catch (Exception e) {
String shadePrefix = conf.getVar(HiveConf.ConfVars.HIVE_CLASSLOADER_SHADE_PREFIX);
if (shadePrefix != null && !shadePrefix.trim().isEmpty()) {
try {
LOG.info("Shade prefix: {} specified. Using as fallback to load {}..", shadePrefix, outFormatName);
this.acidOutputFormat = (AcidOutputFormat<?, ?>) ReflectionUtils.newInstance(JavaUtils.loadClass(shadePrefix, outFormatName), conf);
} catch (ClassNotFoundException e1) {
throw new StreamingException(e.getMessage(), e);
}
} else {
throw new StreamingException(e.getMessage(), e);
}
}
setupMemoryMonitoring();
try {
final AbstractSerDe serDe = createSerde();
this.inputRowObjectInspector = (StructObjectInspector) serDe.getObjectInspector();
if (conn.isPartitionedTable() && conn.isDynamicPartitioning()) {
preparePartitioningFields();
int dpStartCol = inputRowObjectInspector.getAllStructFieldRefs().size() - table.getPartitionKeys().size();
this.outputRowObjectInspector = new SubStructObjectInspector(inputRowObjectInspector, 0, dpStartCol);
} else {
this.outputRowObjectInspector = inputRowObjectInspector;
}
prepareBucketingFields();
} catch (SerDeException e) {
throw new StreamingException("Unable to create SerDe", e);
}
}
Aggregations