use of org.apache.orc.mapred.OrcKey in project incubator-gobblin by apache.
the class OrcValueMapper method setup.
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
this.jobConf = new JobConf(context.getConfiguration());
this.outKey = new OrcKey();
this.outKey.configure(jobConf);
this.outValue = new OrcValue();
this.outValue.configure(jobConf);
// This is the consistent input-schema among all mappers.
this.mrInputSchema = TypeDescription.fromString(context.getConfiguration().get(OrcConf.MAPRED_INPUT_SCHEMA.getAttribute()));
this.shuffleKeySchema = TypeDescription.fromString(context.getConfiguration().get(MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute()));
}
use of org.apache.orc.mapred.OrcKey in project incubator-gobblin by apache.
the class OrcKeyComparatorTest method testComplexRecordArray.
@Test
public void testComplexRecordArray() throws Exception {
OrcKeyComparator comparator = new OrcKeyComparator();
Configuration conf = new Configuration();
TypeDescription listSchema = TypeDescription.createList(TypeDescription.createString());
TypeDescription schema = TypeDescription.createStruct().addField("a", TypeDescription.createInt()).addField("b", listSchema);
conf.set(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute(), schema.toString());
Assert.assertEquals(conf.get(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute()), schema.toString());
comparator.setConf(conf);
// base record
OrcStruct record0 = (OrcStruct) OrcStruct.createValue(schema);
record0.setFieldValue("a", new IntWritable(1));
OrcList orcList0 = createOrcList(3, listSchema, 3);
record0.setFieldValue("b", orcList0);
// the same as base but different object, expecting equal to each other.
OrcStruct record1 = (OrcStruct) OrcStruct.createValue(schema);
record1.setFieldValue("a", new IntWritable(1));
OrcList orcList1 = createOrcList(3, listSchema, 3);
record1.setFieldValue("b", orcList1);
// Diff in int field
OrcStruct record2 = (OrcStruct) OrcStruct.createValue(schema);
record2.setFieldValue("a", new IntWritable(2));
OrcList orcList2 = createOrcList(3, listSchema, 3);
record2.setFieldValue("b", orcList2);
// Diff in array field: 1
OrcStruct record3 = (OrcStruct) OrcStruct.createValue(schema);
record3.setFieldValue("a", new IntWritable(1));
OrcList orcList3 = createOrcList(3, listSchema, 5);
record3.setFieldValue("b", orcList3);
// Diff in array field: 2
OrcStruct record4 = (OrcStruct) OrcStruct.createValue(schema);
record4.setFieldValue("a", new IntWritable(1));
OrcList orcList4 = createOrcList(4, listSchema, 3);
record4.setFieldValue("b", orcList4);
OrcKey orcKey0 = new OrcKey();
orcKey0.key = record0;
OrcKey orcKey1 = new OrcKey();
orcKey1.key = record1;
OrcKey orcKey2 = new OrcKey();
orcKey2.key = record2;
OrcKey orcKey3 = new OrcKey();
orcKey3.key = record3;
OrcKey orcKey4 = new OrcKey();
orcKey4.key = record4;
Assert.assertTrue(comparator.compare(orcKey0, orcKey1) == 0);
Assert.assertTrue(comparator.compare(orcKey1, orcKey2) < 0);
Assert.assertTrue(comparator.compare(orcKey1, orcKey3) < 0);
Assert.assertTrue(comparator.compare(orcKey1, orcKey4) < 0);
}
use of org.apache.orc.mapred.OrcKey in project incubator-gobblin by apache.
the class OrcKeyComparator method setConf.
@Override
public void setConf(Configuration conf) {
super.setConf(conf);
if (null != conf) {
// The MapReduce framework will be using this comparator to sort OrcKey objects
// output from the map phase, so use the schema defined for the map output key
// and the data model non-raw compare() implementation.
schema = TypeDescription.fromString(conf.get(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute()));
OrcStruct orcRecordModel1 = (OrcStruct) OrcStruct.createValue(schema);
OrcStruct orcRecordModel2 = (OrcStruct) OrcStruct.createValue(schema);
if (key1 == null) {
key1 = new OrcKey();
}
if (key2 == null) {
key2 = new OrcKey();
}
if (buffer == null) {
buffer = new DataInputBuffer();
}
key1.key = orcRecordModel1;
key2.key = orcRecordModel2;
}
}
use of org.apache.orc.mapred.OrcKey in project incubator-gobblin by apache.
the class OrcKeyComparatorTest method testSimpleComparator.
@Test
public void testSimpleComparator() throws Exception {
OrcKeyComparator comparator = new OrcKeyComparator();
Configuration conf = new Configuration();
String orcSchema = "struct<i:int,j:int>";
TypeDescription schema = TypeDescription.fromString(orcSchema);
conf.set(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute(), orcSchema);
Assert.assertEquals(conf.get(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute()), orcSchema);
comparator.setConf(conf);
OrcStruct record0 = createSimpleOrcStruct(schema, 1, 2);
OrcStruct record1 = createSimpleOrcStruct(schema, 3, 4);
OrcStruct record2 = createSimpleOrcStruct(schema, 3, 4);
OrcKey orcKey0 = new OrcKey();
orcKey0.key = record0;
OrcKey orcKey1 = new OrcKey();
orcKey1.key = record1;
OrcKey orcKey2 = new OrcKey();
orcKey2.key = record2;
Assert.assertTrue(comparator.compare(orcKey0, orcKey1) < 0);
Assert.assertTrue(comparator.compare(orcKey1, orcKey2) == 0);
Assert.assertTrue(comparator.compare(orcKey1, orcKey0) > 0);
}
use of org.apache.orc.mapred.OrcKey in project incubator-gobblin by apache.
the class OrcKeyComparatorTest method testComplexRecordUnion.
// Test comparison for union containing complex types and nested record inside.
// Schema: struct<a:int,
// b:uniontype<int,
// array<string>,
// struct<x:int,y:int>
// >
// >
@Test
public void testComplexRecordUnion() throws Exception {
OrcKeyComparator comparator = new OrcKeyComparator();
Configuration conf = new Configuration();
TypeDescription listSchema = TypeDescription.createList(TypeDescription.createString());
TypeDescription nestedRecordSchema = TypeDescription.createStruct().addField("x", TypeDescription.createInt()).addField("y", TypeDescription.createInt());
TypeDescription unionSchema = TypeDescription.createUnion().addUnionChild(TypeDescription.createInt()).addUnionChild(listSchema).addUnionChild(nestedRecordSchema);
TypeDescription schema = TypeDescription.createStruct().addField("a", TypeDescription.createInt()).addField("b", unionSchema);
conf.set(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute(), schema.toString());
Assert.assertEquals(conf.get(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute()), schema.toString());
comparator.setConf(conf);
// base record
OrcStruct record0 = (OrcStruct) OrcStruct.createValue(schema);
record0.setFieldValue("a", new IntWritable(1));
OrcStruct nestedRecord0 = createSimpleOrcStruct(nestedRecordSchema, 1, 2);
OrcUnion orcUnion0 = createOrcUnion(unionSchema, nestedRecord0);
record0.setFieldValue("b", orcUnion0);
// same content as base record in diff objects.
OrcStruct record1 = (OrcStruct) OrcStruct.createValue(schema);
record1.setFieldValue("a", new IntWritable(1));
OrcStruct nestedRecord1 = createSimpleOrcStruct(nestedRecordSchema, 1, 2);
OrcUnion orcUnion1 = createOrcUnion(unionSchema, nestedRecord1);
record1.setFieldValue("b", orcUnion1);
// diff records inside union, record0 == record1 < 2
OrcStruct record2 = (OrcStruct) OrcStruct.createValue(schema);
record2.setFieldValue("a", new IntWritable(1));
OrcStruct nestedRecord2 = createSimpleOrcStruct(nestedRecordSchema, 2, 2);
OrcUnion orcUnion2 = createOrcUnion(unionSchema, nestedRecord2);
record2.setFieldValue("b", orcUnion2);
// differ in list inside union, record3 < record4 == record5
OrcStruct record3 = (OrcStruct) OrcStruct.createValue(schema);
record3.setFieldValue("a", new IntWritable(1));
OrcList orcList3 = createOrcList(5, listSchema, 2);
OrcUnion orcUnion3 = createOrcUnion(unionSchema, orcList3);
record3.setFieldValue("b", orcUnion3);
OrcStruct record4 = (OrcStruct) OrcStruct.createValue(schema);
record4.setFieldValue("a", new IntWritable(1));
OrcList orcList4 = createOrcList(6, listSchema, 2);
OrcUnion orcUnion4 = createOrcUnion(unionSchema, orcList4);
record4.setFieldValue("b", orcUnion4);
OrcStruct record5 = (OrcStruct) OrcStruct.createValue(schema);
record5.setFieldValue("a", new IntWritable(1));
OrcList orcList5 = createOrcList(6, listSchema, 2);
OrcUnion orcUnion5 = createOrcUnion(unionSchema, orcList5);
record5.setFieldValue("b", orcUnion5);
OrcKey orcKey0 = new OrcKey();
orcKey0.key = record0;
OrcKey orcKey1 = new OrcKey();
orcKey1.key = record1;
OrcKey orcKey2 = new OrcKey();
orcKey2.key = record2;
OrcKey orcKey3 = new OrcKey();
orcKey3.key = record3;
OrcKey orcKey4 = new OrcKey();
orcKey4.key = record4;
OrcKey orcKey5 = new OrcKey();
orcKey5.key = record5;
Assert.assertEquals(orcUnion0, orcUnion1);
// Int value in orcKey2 is larger
Assert.assertTrue(comparator.compare(orcKey0, orcKey2) < 0);
Assert.assertTrue(comparator.compare(orcKey3, orcKey4) < 0);
Assert.assertTrue(comparator.compare(orcKey3, orcKey5) < 0);
Assert.assertTrue(comparator.compare(orcKey4, orcKey5) == 0);
}
Aggregations