use of org.apache.orc.mapred.OrcUnion in project incubator-gobblin by apache.
the class OrcTestUtils method fillOrcStructWithFixedValue.
/**
* Fill in value in OrcStruct with given schema, assuming {@param w} contains the same schema as {@param schema}.
* {@param schema} is still necessary to given given {@param w} do contains schema information itself, because the
* actual value type is only available in {@link TypeDescription} but not {@link org.apache.orc.mapred.OrcValue}.
*
* For simplicity here are some assumptions:
* - We only give 3 primitive values and use them to construct compound values. To make it work for different types that
* can be widened or shrunk to each other, please use value within small range.
* - For List, Map or Union, make sure there's at least one entry within the record-container.
* you may want to try createValueRecursively(TypeDescription) instead of {@link OrcStruct#createValue(TypeDescription)}
*/
public static void fillOrcStructWithFixedValue(WritableComparable w, TypeDescription schema, int unionTag, int intValue, String stringValue, boolean booleanValue) {
switch(schema.getCategory()) {
case BOOLEAN:
((BooleanWritable) w).set(booleanValue);
break;
case BYTE:
((ByteWritable) w).set((byte) intValue);
break;
case SHORT:
((ShortWritable) w).set((short) intValue);
break;
case INT:
((IntWritable) w).set(intValue);
break;
case LONG:
((LongWritable) w).set(intValue);
break;
case FLOAT:
((FloatWritable) w).set(intValue * 1.0f);
break;
case DOUBLE:
((DoubleWritable) w).set(intValue * 1.0);
break;
case STRING:
case CHAR:
case VARCHAR:
((Text) w).set(stringValue);
break;
case BINARY:
throw new UnsupportedOperationException("Binary type is not supported in random orc data filler");
case DECIMAL:
throw new UnsupportedOperationException("Decimal type is not supported in random orc data filler");
case DATE:
case TIMESTAMP:
case TIMESTAMP_INSTANT:
throw new UnsupportedOperationException("Timestamp and its derived types is not supported in random orc data filler");
case LIST:
OrcList castedList = (OrcList) w;
// to at least contain one element, or the traversing within the list will be skipped.
for (Object i : castedList) {
fillOrcStructWithFixedValue((WritableComparable) i, schema.getChildren().get(0), unionTag, intValue, stringValue, booleanValue);
}
break;
case MAP:
OrcMap castedMap = (OrcMap) w;
for (Object entry : castedMap.entrySet()) {
Map.Entry<WritableComparable, WritableComparable> castedEntry = (Map.Entry<WritableComparable, WritableComparable>) entry;
fillOrcStructWithFixedValue(castedEntry.getKey(), schema.getChildren().get(0), unionTag, intValue, stringValue, booleanValue);
fillOrcStructWithFixedValue(castedEntry.getValue(), schema.getChildren().get(1), unionTag, intValue, stringValue, booleanValue);
}
break;
case STRUCT:
OrcStruct castedStruct = (OrcStruct) w;
int fieldIdx = 0;
for (TypeDescription child : schema.getChildren()) {
fillOrcStructWithFixedValue(castedStruct.getFieldValue(fieldIdx), child, unionTag, intValue, stringValue, booleanValue);
fieldIdx += 1;
}
break;
case UNION:
OrcUnion castedUnion = (OrcUnion) w;
TypeDescription targetMemberSchema = schema.getChildren().get(unionTag);
castedUnion.set(unionTag, OrcUtils.createValueRecursively(targetMemberSchema));
fillOrcStructWithFixedValue((WritableComparable) castedUnion.getObject(), targetMemberSchema, unionTag, intValue, stringValue, booleanValue);
break;
default:
throw new IllegalArgumentException("Unknown type " + schema.toString());
}
}
use of org.apache.orc.mapred.OrcUnion in project incubator-gobblin by apache.
the class OrcUtilsTest method testNestedWithinUnionWithDiffTag.
@Test
public void testNestedWithinUnionWithDiffTag() {
// Construct union type with different tag for the src object dest object, check if up-convert happens correctly.
TypeDescription structInUnionAsStruct = TypeDescription.fromString("struct<a:uniontype<struct<a:int,b:string>,int>>");
OrcStruct structInUnionAsStructObject = (OrcStruct) OrcUtils.createValueRecursively(structInUnionAsStruct);
OrcTestUtils.fillOrcStructWithFixedValue(structInUnionAsStructObject, structInUnionAsStruct, 0, intValue1, stringValue1, boolValue);
Assert.assertEquals(((OrcStruct) ((OrcUnion) structInUnionAsStructObject.getFieldValue("a")).getObject()).getFieldValue("a"), new IntWritable(intValue1));
OrcStruct structInUnionAsStructObject_2 = (OrcStruct) OrcUtils.createValueRecursively(structInUnionAsStruct);
OrcTestUtils.fillOrcStructWithFixedValue(structInUnionAsStructObject_2, structInUnionAsStruct, 1, intValue1, stringValue1, boolValue);
Assert.assertEquals(((OrcUnion) structInUnionAsStructObject_2.getFieldValue("a")).getObject(), new IntWritable(intValue1));
// Create a new record container, do up-convert twice and check if the value is propagated properly.
OrcStruct container = (OrcStruct) OrcUtils.createValueRecursively(structInUnionAsStruct);
OrcUtils.upConvertOrcStruct(structInUnionAsStructObject, container, structInUnionAsStruct);
Assert.assertEquals(structInUnionAsStructObject, container);
OrcUtils.upConvertOrcStruct(structInUnionAsStructObject_2, container, structInUnionAsStruct);
Assert.assertEquals(structInUnionAsStructObject_2, container);
}
use of org.apache.orc.mapred.OrcUnion in project incubator-gobblin by apache.
the class GenericRecordToOrcValueWriterTest method getUnionFieldFromStruct.
/**
* Accessing "fields" using reflection to work-around access modifiers.
*/
private OrcUnion getUnionFieldFromStruct(Writable struct) {
try {
OrcStruct orcStruct = (OrcStruct) struct;
Field objectArr = OrcStruct.class.getDeclaredField("fields");
objectArr.setAccessible(true);
return (OrcUnion) ((Object[]) objectArr.get(orcStruct))[0];
} catch (Exception e) {
throw new RuntimeException("Cannot access with reflection", e);
}
}
use of org.apache.orc.mapred.OrcUnion in project incubator-gobblin by apache.
the class OrcKeyComparatorTest method testComplexRecordUnion.
// Test comparison for union containing complex types and nested record inside.
// Schema: struct<a:int,
// b:uniontype<int,
// array<string>,
// struct<x:int,y:int>
// >
// >
@Test
public void testComplexRecordUnion() throws Exception {
OrcKeyComparator comparator = new OrcKeyComparator();
Configuration conf = new Configuration();
TypeDescription listSchema = TypeDescription.createList(TypeDescription.createString());
TypeDescription nestedRecordSchema = TypeDescription.createStruct().addField("x", TypeDescription.createInt()).addField("y", TypeDescription.createInt());
TypeDescription unionSchema = TypeDescription.createUnion().addUnionChild(TypeDescription.createInt()).addUnionChild(listSchema).addUnionChild(nestedRecordSchema);
TypeDescription schema = TypeDescription.createStruct().addField("a", TypeDescription.createInt()).addField("b", unionSchema);
conf.set(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute(), schema.toString());
Assert.assertEquals(conf.get(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute()), schema.toString());
comparator.setConf(conf);
// base record
OrcStruct record0 = (OrcStruct) OrcStruct.createValue(schema);
record0.setFieldValue("a", new IntWritable(1));
OrcStruct nestedRecord0 = createSimpleOrcStruct(nestedRecordSchema, 1, 2);
OrcUnion orcUnion0 = createOrcUnion(unionSchema, nestedRecord0);
record0.setFieldValue("b", orcUnion0);
// same content as base record in diff objects.
OrcStruct record1 = (OrcStruct) OrcStruct.createValue(schema);
record1.setFieldValue("a", new IntWritable(1));
OrcStruct nestedRecord1 = createSimpleOrcStruct(nestedRecordSchema, 1, 2);
OrcUnion orcUnion1 = createOrcUnion(unionSchema, nestedRecord1);
record1.setFieldValue("b", orcUnion1);
// diff records inside union, record0 == record1 < 2
OrcStruct record2 = (OrcStruct) OrcStruct.createValue(schema);
record2.setFieldValue("a", new IntWritable(1));
OrcStruct nestedRecord2 = createSimpleOrcStruct(nestedRecordSchema, 2, 2);
OrcUnion orcUnion2 = createOrcUnion(unionSchema, nestedRecord2);
record2.setFieldValue("b", orcUnion2);
// differ in list inside union, record3 < record4 == record5
OrcStruct record3 = (OrcStruct) OrcStruct.createValue(schema);
record3.setFieldValue("a", new IntWritable(1));
OrcList orcList3 = createOrcList(5, listSchema, 2);
OrcUnion orcUnion3 = createOrcUnion(unionSchema, orcList3);
record3.setFieldValue("b", orcUnion3);
OrcStruct record4 = (OrcStruct) OrcStruct.createValue(schema);
record4.setFieldValue("a", new IntWritable(1));
OrcList orcList4 = createOrcList(6, listSchema, 2);
OrcUnion orcUnion4 = createOrcUnion(unionSchema, orcList4);
record4.setFieldValue("b", orcUnion4);
OrcStruct record5 = (OrcStruct) OrcStruct.createValue(schema);
record5.setFieldValue("a", new IntWritable(1));
OrcList orcList5 = createOrcList(6, listSchema, 2);
OrcUnion orcUnion5 = createOrcUnion(unionSchema, orcList5);
record5.setFieldValue("b", orcUnion5);
OrcKey orcKey0 = new OrcKey();
orcKey0.key = record0;
OrcKey orcKey1 = new OrcKey();
orcKey1.key = record1;
OrcKey orcKey2 = new OrcKey();
orcKey2.key = record2;
OrcKey orcKey3 = new OrcKey();
orcKey3.key = record3;
OrcKey orcKey4 = new OrcKey();
orcKey4.key = record4;
OrcKey orcKey5 = new OrcKey();
orcKey5.key = record5;
Assert.assertEquals(orcUnion0, orcUnion1);
// Int value in orcKey2 is larger
Assert.assertTrue(comparator.compare(orcKey0, orcKey2) < 0);
Assert.assertTrue(comparator.compare(orcKey3, orcKey4) < 0);
Assert.assertTrue(comparator.compare(orcKey3, orcKey5) < 0);
Assert.assertTrue(comparator.compare(orcKey4, orcKey5) == 0);
}
use of org.apache.orc.mapred.OrcUnion in project incubator-gobblin by apache.
the class OrcKeyComparatorTest method createOrcUnion.
private OrcUnion createOrcUnion(TypeDescription schema, WritableComparable value) {
OrcUnion result = new OrcUnion(schema);
result.set(0, value);
return result;
}
Aggregations