use of org.apache.orc.mapred.OrcStruct in project incubator-gobblin by apache.
the class OrcCompactionTaskTest method readOrcFile.
/**
* Read a output ORC compacted file into memory.
* This only works if fields are int value.
*/
private List<OrcStruct> readOrcFile(Path orcFilePath) throws IOException, InterruptedException {
ReaderImpl orcReader = new ReaderImpl(orcFilePath, new OrcFile.ReaderOptions(new Configuration()));
Reader.Options options = new Reader.Options().schema(orcReader.getSchema());
OrcMapreduceRecordReader recordReader = new OrcMapreduceRecordReader(orcReader, options);
List<OrcStruct> result = new ArrayList<>();
OrcStruct recordContainer;
while (recordReader.nextKeyValue()) {
recordContainer = (OrcStruct) OrcUtils.createValueRecursively(orcReader.getSchema());
OrcUtils.upConvertOrcStruct((OrcStruct) recordReader.getCurrentValue(), recordContainer, orcReader.getSchema());
result.add(recordContainer);
}
return result;
}
use of org.apache.orc.mapred.OrcStruct in project incubator-gobblin by apache.
the class OrcKeyComparatorTest method testSimpleComparator.
@Test
public void testSimpleComparator() throws Exception {
OrcKeyComparator comparator = new OrcKeyComparator();
Configuration conf = new Configuration();
String orcSchema = "struct<i:int,j:int>";
TypeDescription schema = TypeDescription.fromString(orcSchema);
conf.set(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute(), orcSchema);
Assert.assertEquals(conf.get(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute()), orcSchema);
comparator.setConf(conf);
OrcStruct record0 = createSimpleOrcStruct(schema, 1, 2);
OrcStruct record1 = createSimpleOrcStruct(schema, 3, 4);
OrcStruct record2 = createSimpleOrcStruct(schema, 3, 4);
OrcKey orcKey0 = new OrcKey();
orcKey0.key = record0;
OrcKey orcKey1 = new OrcKey();
orcKey1.key = record1;
OrcKey orcKey2 = new OrcKey();
orcKey2.key = record2;
Assert.assertTrue(comparator.compare(orcKey0, orcKey1) < 0);
Assert.assertTrue(comparator.compare(orcKey1, orcKey2) == 0);
Assert.assertTrue(comparator.compare(orcKey1, orcKey0) > 0);
}
use of org.apache.orc.mapred.OrcStruct in project incubator-gobblin by apache.
the class OrcKeyComparatorTest method testComplexRecordUnion.
// Test comparison for union containing complex types and nested record inside.
// Schema: struct<a:int,
// b:uniontype<int,
// array<string>,
// struct<x:int,y:int>
// >
// >
@Test
public void testComplexRecordUnion() throws Exception {
OrcKeyComparator comparator = new OrcKeyComparator();
Configuration conf = new Configuration();
TypeDescription listSchema = TypeDescription.createList(TypeDescription.createString());
TypeDescription nestedRecordSchema = TypeDescription.createStruct().addField("x", TypeDescription.createInt()).addField("y", TypeDescription.createInt());
TypeDescription unionSchema = TypeDescription.createUnion().addUnionChild(TypeDescription.createInt()).addUnionChild(listSchema).addUnionChild(nestedRecordSchema);
TypeDescription schema = TypeDescription.createStruct().addField("a", TypeDescription.createInt()).addField("b", unionSchema);
conf.set(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute(), schema.toString());
Assert.assertEquals(conf.get(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute()), schema.toString());
comparator.setConf(conf);
// base record
OrcStruct record0 = (OrcStruct) OrcStruct.createValue(schema);
record0.setFieldValue("a", new IntWritable(1));
OrcStruct nestedRecord0 = createSimpleOrcStruct(nestedRecordSchema, 1, 2);
OrcUnion orcUnion0 = createOrcUnion(unionSchema, nestedRecord0);
record0.setFieldValue("b", orcUnion0);
// same content as base record in diff objects.
OrcStruct record1 = (OrcStruct) OrcStruct.createValue(schema);
record1.setFieldValue("a", new IntWritable(1));
OrcStruct nestedRecord1 = createSimpleOrcStruct(nestedRecordSchema, 1, 2);
OrcUnion orcUnion1 = createOrcUnion(unionSchema, nestedRecord1);
record1.setFieldValue("b", orcUnion1);
// diff records inside union, record0 == record1 < 2
OrcStruct record2 = (OrcStruct) OrcStruct.createValue(schema);
record2.setFieldValue("a", new IntWritable(1));
OrcStruct nestedRecord2 = createSimpleOrcStruct(nestedRecordSchema, 2, 2);
OrcUnion orcUnion2 = createOrcUnion(unionSchema, nestedRecord2);
record2.setFieldValue("b", orcUnion2);
// differ in list inside union, record3 < record4 == record5
OrcStruct record3 = (OrcStruct) OrcStruct.createValue(schema);
record3.setFieldValue("a", new IntWritable(1));
OrcList orcList3 = createOrcList(5, listSchema, 2);
OrcUnion orcUnion3 = createOrcUnion(unionSchema, orcList3);
record3.setFieldValue("b", orcUnion3);
OrcStruct record4 = (OrcStruct) OrcStruct.createValue(schema);
record4.setFieldValue("a", new IntWritable(1));
OrcList orcList4 = createOrcList(6, listSchema, 2);
OrcUnion orcUnion4 = createOrcUnion(unionSchema, orcList4);
record4.setFieldValue("b", orcUnion4);
OrcStruct record5 = (OrcStruct) OrcStruct.createValue(schema);
record5.setFieldValue("a", new IntWritable(1));
OrcList orcList5 = createOrcList(6, listSchema, 2);
OrcUnion orcUnion5 = createOrcUnion(unionSchema, orcList5);
record5.setFieldValue("b", orcUnion5);
OrcKey orcKey0 = new OrcKey();
orcKey0.key = record0;
OrcKey orcKey1 = new OrcKey();
orcKey1.key = record1;
OrcKey orcKey2 = new OrcKey();
orcKey2.key = record2;
OrcKey orcKey3 = new OrcKey();
orcKey3.key = record3;
OrcKey orcKey4 = new OrcKey();
orcKey4.key = record4;
OrcKey orcKey5 = new OrcKey();
orcKey5.key = record5;
Assert.assertEquals(orcUnion0, orcUnion1);
// Int value in orcKey2 is larger
Assert.assertTrue(comparator.compare(orcKey0, orcKey2) < 0);
Assert.assertTrue(comparator.compare(orcKey3, orcKey4) < 0);
Assert.assertTrue(comparator.compare(orcKey3, orcKey5) < 0);
Assert.assertTrue(comparator.compare(orcKey4, orcKey5) == 0);
}
use of org.apache.orc.mapred.OrcStruct in project incubator-gobblin by apache.
the class OrcKeyComparatorTest method testComplexRecordMap.
@Test
public void testComplexRecordMap() throws Exception {
OrcKeyComparator comparator = new OrcKeyComparator();
Configuration conf = new Configuration();
TypeDescription mapFieldSchema = TypeDescription.createMap(TypeDescription.createString(), TypeDescription.createString());
TypeDescription schema = TypeDescription.createStruct().addField("a", TypeDescription.createInt()).addField("b", mapFieldSchema);
conf.set(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute(), schema.toString());
Assert.assertEquals(conf.get(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute()), schema.toString());
comparator.setConf(conf);
// base record
OrcStruct record0 = (OrcStruct) OrcStruct.createValue(schema);
record0.setFieldValue("a", new IntWritable(1));
OrcMap orcMap = createSimpleOrcMap(new Text("key"), new Text("value"), mapFieldSchema);
record0.setFieldValue("b", orcMap);
// key value both differ
OrcStruct record1 = (OrcStruct) OrcStruct.createValue(schema);
record1.setFieldValue("a", new IntWritable(1));
OrcMap orcMap1 = createSimpleOrcMap(new Text("key_key"), new Text("value_value"), mapFieldSchema);
record1.setFieldValue("b", orcMap1);
// Key same, value differ
OrcStruct record2 = (OrcStruct) OrcStruct.createValue(schema);
record2.setFieldValue("a", new IntWritable(1));
OrcMap orcMap2 = createSimpleOrcMap(new Text("key"), new Text("value_value"), mapFieldSchema);
record2.setFieldValue("b", orcMap2);
// Same as base
OrcStruct record3 = (OrcStruct) OrcStruct.createValue(schema);
record3.setFieldValue("a", new IntWritable(1));
OrcMap orcMap3 = createSimpleOrcMap(new Text("key"), new Text("value"), mapFieldSchema);
record3.setFieldValue("b", orcMap3);
// Differ in other field.
OrcStruct record4 = (OrcStruct) OrcStruct.createValue(schema);
record4.setFieldValue("a", new IntWritable(2));
record4.setFieldValue("b", orcMap);
// Record with map containing multiple entries but inserted in different order.
OrcStruct record6 = (OrcStruct) OrcStruct.createValue(schema);
record6.setFieldValue("a", new IntWritable(1));
OrcMap orcMap6 = createSimpleOrcMap(new Text("key"), new Text("value"), mapFieldSchema);
orcMap6.put(new Text("keyLater"), new Text("valueLater"));
record6.setFieldValue("b", orcMap6);
OrcStruct record7 = (OrcStruct) OrcStruct.createValue(schema);
record7.setFieldValue("a", new IntWritable(1));
OrcMap orcMap7 = createSimpleOrcMap(new Text("keyLater"), new Text("valueLater"), mapFieldSchema);
orcMap7.put(new Text("key"), new Text("value"));
record7.setFieldValue("b", orcMap7);
OrcKey orcKey0 = new OrcKey();
orcKey0.key = record0;
OrcKey orcKey1 = new OrcKey();
orcKey1.key = record1;
OrcKey orcKey2 = new OrcKey();
orcKey2.key = record2;
OrcKey orcKey3 = new OrcKey();
orcKey3.key = record3;
OrcKey orcKey4 = new OrcKey();
orcKey4.key = record4;
OrcKey orcKey6 = new OrcKey();
orcKey6.key = record6;
OrcKey orcKey7 = new OrcKey();
orcKey7.key = record7;
Assert.assertTrue(comparator.compare(orcKey0, orcKey1) < 0);
Assert.assertTrue(comparator.compare(orcKey1, orcKey2) > 0);
Assert.assertTrue(comparator.compare(orcKey2, orcKey3) > 0);
Assert.assertTrue(comparator.compare(orcKey0, orcKey3) == 0);
Assert.assertTrue(comparator.compare(orcKey0, orcKey4) < 0);
Assert.assertTrue(comparator.compare(orcKey6, orcKey7) == 0);
}
use of org.apache.orc.mapred.OrcStruct in project incubator-gobblin by apache.
the class OrcUtils method structConversionHelper.
/**
* This method copies value in object {@param w} into object {@param v} recursively even if the schema of w and v
* differs in a compatible way, meaning if there's a field existing in v but not in w, the null value will be filled.
* It served as a helper method for {@link #upConvertOrcStruct(OrcStruct, OrcStruct, TypeDescription)} when OrcStruct
* contains nested structure as a member.
*
* Suppress the warning of type checking: All casts are clearly valid as they are all (sub)elements Orc types.
* Check failure will trigger Cast exception and blow up the process.
*/
@SuppressWarnings("unchecked")
private static WritableComparable structConversionHelper(WritableComparable w, WritableComparable v, TypeDescription targetSchema) {
if (w instanceof OrcStruct) {
upConvertOrcStruct((OrcStruct) w, (OrcStruct) v, targetSchema);
} else if (w instanceof OrcList) {
OrcList castedList = (OrcList) w;
OrcList targetList = (OrcList) v;
TypeDescription elementType = targetSchema.getChildren().get(0);
targetList.clear();
for (int i = 0; i < castedList.size(); i++) {
WritableComparable targetListRecordContainer = createValueRecursively(elementType, 0);
targetList.add(i, structConversionHelper((WritableComparable) castedList.get(i), targetListRecordContainer, elementType));
}
} else if (w instanceof OrcMap) {
OrcMap castedMap = (OrcMap) w;
OrcMap targetMap = (OrcMap) v;
TypeDescription valueSchema = targetSchema.getChildren().get(1);
targetMap.clear();
for (Object entry : castedMap.entrySet()) {
Map.Entry<WritableComparable, WritableComparable> castedEntry = (Map.Entry<WritableComparable, WritableComparable>) entry;
WritableComparable targetMapRecordContainer = createValueRecursively(valueSchema);
targetMapRecordContainer = structConversionHelper(castedEntry.getValue(), targetMapRecordContainer, valueSchema);
targetMap.put(castedEntry.getKey(), targetMapRecordContainer);
}
} else if (w instanceof OrcUnion) {
OrcUnion castedUnion = (OrcUnion) w;
OrcUnion targetUnion = (OrcUnion) v;
byte tag = castedUnion.getTag();
// ORC doesn't support Union type widening
// Avro doesn't allow it either, reference: https://avro.apache.org/docs/current/spec.html#Schema+Resolution
// As a result, member schema within source and target should be identical.
TypeDescription targetMemberSchema = targetSchema.getChildren().get(tag);
targetUnion.set(tag, structConversionHelper((WritableComparable) castedUnion.getObject(), (WritableComparable) OrcUtils.createValueRecursively(targetMemberSchema), targetMemberSchema));
} else {
// Regardless whether type-widening is happening or not, this method copy the value of w into v.
handlePrimitiveWritableComparable(w, v);
}
// If non-primitive or type-widening is required, v should already be populated by w's value recursively.
return v;
}
Aggregations