Search in sources :

Example 1 with OrcKey

use of org.apache.orc.mapred.OrcKey in project incubator-gobblin by apache.

the class OrcValueMapper method setup.

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    this.jobConf = new JobConf(context.getConfiguration());
    this.outKey = new OrcKey();
    this.outKey.configure(jobConf);
    this.outValue = new OrcValue();
    this.outValue.configure(jobConf);
    // This is the consistent input-schema among all mappers.
    this.mrInputSchema = TypeDescription.fromString(context.getConfiguration().get(OrcConf.MAPRED_INPUT_SCHEMA.getAttribute()));
    this.shuffleKeySchema = TypeDescription.fromString(context.getConfiguration().get(MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute()));
}
Also used : OrcKey(org.apache.orc.mapred.OrcKey) JobConf(org.apache.hadoop.mapred.JobConf) OrcValue(org.apache.orc.mapred.OrcValue)

Example 2 with OrcKey

use of org.apache.orc.mapred.OrcKey in project incubator-gobblin by apache.

the class OrcKeyComparatorTest method testComplexRecordArray.

@Test
public void testComplexRecordArray() throws Exception {
    OrcKeyComparator comparator = new OrcKeyComparator();
    Configuration conf = new Configuration();
    TypeDescription listSchema = TypeDescription.createList(TypeDescription.createString());
    TypeDescription schema = TypeDescription.createStruct().addField("a", TypeDescription.createInt()).addField("b", listSchema);
    conf.set(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute(), schema.toString());
    Assert.assertEquals(conf.get(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute()), schema.toString());
    comparator.setConf(conf);
    // base record
    OrcStruct record0 = (OrcStruct) OrcStruct.createValue(schema);
    record0.setFieldValue("a", new IntWritable(1));
    OrcList orcList0 = createOrcList(3, listSchema, 3);
    record0.setFieldValue("b", orcList0);
    // the same as base but different object, expecting equal to each other.
    OrcStruct record1 = (OrcStruct) OrcStruct.createValue(schema);
    record1.setFieldValue("a", new IntWritable(1));
    OrcList orcList1 = createOrcList(3, listSchema, 3);
    record1.setFieldValue("b", orcList1);
    // Diff in int field
    OrcStruct record2 = (OrcStruct) OrcStruct.createValue(schema);
    record2.setFieldValue("a", new IntWritable(2));
    OrcList orcList2 = createOrcList(3, listSchema, 3);
    record2.setFieldValue("b", orcList2);
    // Diff in array field: 1
    OrcStruct record3 = (OrcStruct) OrcStruct.createValue(schema);
    record3.setFieldValue("a", new IntWritable(1));
    OrcList orcList3 = createOrcList(3, listSchema, 5);
    record3.setFieldValue("b", orcList3);
    // Diff in array field: 2
    OrcStruct record4 = (OrcStruct) OrcStruct.createValue(schema);
    record4.setFieldValue("a", new IntWritable(1));
    OrcList orcList4 = createOrcList(4, listSchema, 3);
    record4.setFieldValue("b", orcList4);
    OrcKey orcKey0 = new OrcKey();
    orcKey0.key = record0;
    OrcKey orcKey1 = new OrcKey();
    orcKey1.key = record1;
    OrcKey orcKey2 = new OrcKey();
    orcKey2.key = record2;
    OrcKey orcKey3 = new OrcKey();
    orcKey3.key = record3;
    OrcKey orcKey4 = new OrcKey();
    orcKey4.key = record4;
    Assert.assertTrue(comparator.compare(orcKey0, orcKey1) == 0);
    Assert.assertTrue(comparator.compare(orcKey1, orcKey2) < 0);
    Assert.assertTrue(comparator.compare(orcKey1, orcKey3) < 0);
    Assert.assertTrue(comparator.compare(orcKey1, orcKey4) < 0);
}
Also used : OrcStruct(org.apache.orc.mapred.OrcStruct) Configuration(org.apache.hadoop.conf.Configuration) OrcList(org.apache.orc.mapred.OrcList) TypeDescription(org.apache.orc.TypeDescription) OrcKey(org.apache.orc.mapred.OrcKey) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.testng.annotations.Test)

Example 3 with OrcKey

use of org.apache.orc.mapred.OrcKey in project incubator-gobblin by apache.

the class OrcKeyComparator method setConf.

@Override
public void setConf(Configuration conf) {
    super.setConf(conf);
    if (null != conf) {
        // The MapReduce framework will be using this comparator to sort OrcKey objects
        // output from the map phase, so use the schema defined for the map output key
        // and the data model non-raw compare() implementation.
        schema = TypeDescription.fromString(conf.get(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute()));
        OrcStruct orcRecordModel1 = (OrcStruct) OrcStruct.createValue(schema);
        OrcStruct orcRecordModel2 = (OrcStruct) OrcStruct.createValue(schema);
        if (key1 == null) {
            key1 = new OrcKey();
        }
        if (key2 == null) {
            key2 = new OrcKey();
        }
        if (buffer == null) {
            buffer = new DataInputBuffer();
        }
        key1.key = orcRecordModel1;
        key2.key = orcRecordModel2;
    }
}
Also used : OrcStruct(org.apache.orc.mapred.OrcStruct) DataInputBuffer(org.apache.hadoop.io.DataInputBuffer) OrcKey(org.apache.orc.mapred.OrcKey)

Example 4 with OrcKey

use of org.apache.orc.mapred.OrcKey in project incubator-gobblin by apache.

the class OrcKeyComparatorTest method testSimpleComparator.

@Test
public void testSimpleComparator() throws Exception {
    OrcKeyComparator comparator = new OrcKeyComparator();
    Configuration conf = new Configuration();
    String orcSchema = "struct<i:int,j:int>";
    TypeDescription schema = TypeDescription.fromString(orcSchema);
    conf.set(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute(), orcSchema);
    Assert.assertEquals(conf.get(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute()), orcSchema);
    comparator.setConf(conf);
    OrcStruct record0 = createSimpleOrcStruct(schema, 1, 2);
    OrcStruct record1 = createSimpleOrcStruct(schema, 3, 4);
    OrcStruct record2 = createSimpleOrcStruct(schema, 3, 4);
    OrcKey orcKey0 = new OrcKey();
    orcKey0.key = record0;
    OrcKey orcKey1 = new OrcKey();
    orcKey1.key = record1;
    OrcKey orcKey2 = new OrcKey();
    orcKey2.key = record2;
    Assert.assertTrue(comparator.compare(orcKey0, orcKey1) < 0);
    Assert.assertTrue(comparator.compare(orcKey1, orcKey2) == 0);
    Assert.assertTrue(comparator.compare(orcKey1, orcKey0) > 0);
}
Also used : OrcStruct(org.apache.orc.mapred.OrcStruct) Configuration(org.apache.hadoop.conf.Configuration) TypeDescription(org.apache.orc.TypeDescription) OrcKey(org.apache.orc.mapred.OrcKey) Test(org.testng.annotations.Test)

Example 5 with OrcKey

use of org.apache.orc.mapred.OrcKey in project incubator-gobblin by apache.

the class OrcKeyComparatorTest method testComplexRecordUnion.

// Test comparison for union containing complex types and nested record inside.
// Schema: struct<a:int,
// b:uniontype<int,
// array<string>,
// struct<x:int,y:int>
// >
// >
@Test
public void testComplexRecordUnion() throws Exception {
    OrcKeyComparator comparator = new OrcKeyComparator();
    Configuration conf = new Configuration();
    TypeDescription listSchema = TypeDescription.createList(TypeDescription.createString());
    TypeDescription nestedRecordSchema = TypeDescription.createStruct().addField("x", TypeDescription.createInt()).addField("y", TypeDescription.createInt());
    TypeDescription unionSchema = TypeDescription.createUnion().addUnionChild(TypeDescription.createInt()).addUnionChild(listSchema).addUnionChild(nestedRecordSchema);
    TypeDescription schema = TypeDescription.createStruct().addField("a", TypeDescription.createInt()).addField("b", unionSchema);
    conf.set(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute(), schema.toString());
    Assert.assertEquals(conf.get(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute()), schema.toString());
    comparator.setConf(conf);
    // base record
    OrcStruct record0 = (OrcStruct) OrcStruct.createValue(schema);
    record0.setFieldValue("a", new IntWritable(1));
    OrcStruct nestedRecord0 = createSimpleOrcStruct(nestedRecordSchema, 1, 2);
    OrcUnion orcUnion0 = createOrcUnion(unionSchema, nestedRecord0);
    record0.setFieldValue("b", orcUnion0);
    // same content as base record in diff objects.
    OrcStruct record1 = (OrcStruct) OrcStruct.createValue(schema);
    record1.setFieldValue("a", new IntWritable(1));
    OrcStruct nestedRecord1 = createSimpleOrcStruct(nestedRecordSchema, 1, 2);
    OrcUnion orcUnion1 = createOrcUnion(unionSchema, nestedRecord1);
    record1.setFieldValue("b", orcUnion1);
    // diff records inside union, record0 == record1 < 2
    OrcStruct record2 = (OrcStruct) OrcStruct.createValue(schema);
    record2.setFieldValue("a", new IntWritable(1));
    OrcStruct nestedRecord2 = createSimpleOrcStruct(nestedRecordSchema, 2, 2);
    OrcUnion orcUnion2 = createOrcUnion(unionSchema, nestedRecord2);
    record2.setFieldValue("b", orcUnion2);
    // differ in list inside union, record3 < record4 == record5
    OrcStruct record3 = (OrcStruct) OrcStruct.createValue(schema);
    record3.setFieldValue("a", new IntWritable(1));
    OrcList orcList3 = createOrcList(5, listSchema, 2);
    OrcUnion orcUnion3 = createOrcUnion(unionSchema, orcList3);
    record3.setFieldValue("b", orcUnion3);
    OrcStruct record4 = (OrcStruct) OrcStruct.createValue(schema);
    record4.setFieldValue("a", new IntWritable(1));
    OrcList orcList4 = createOrcList(6, listSchema, 2);
    OrcUnion orcUnion4 = createOrcUnion(unionSchema, orcList4);
    record4.setFieldValue("b", orcUnion4);
    OrcStruct record5 = (OrcStruct) OrcStruct.createValue(schema);
    record5.setFieldValue("a", new IntWritable(1));
    OrcList orcList5 = createOrcList(6, listSchema, 2);
    OrcUnion orcUnion5 = createOrcUnion(unionSchema, orcList5);
    record5.setFieldValue("b", orcUnion5);
    OrcKey orcKey0 = new OrcKey();
    orcKey0.key = record0;
    OrcKey orcKey1 = new OrcKey();
    orcKey1.key = record1;
    OrcKey orcKey2 = new OrcKey();
    orcKey2.key = record2;
    OrcKey orcKey3 = new OrcKey();
    orcKey3.key = record3;
    OrcKey orcKey4 = new OrcKey();
    orcKey4.key = record4;
    OrcKey orcKey5 = new OrcKey();
    orcKey5.key = record5;
    Assert.assertEquals(orcUnion0, orcUnion1);
    // Int value in orcKey2 is larger
    Assert.assertTrue(comparator.compare(orcKey0, orcKey2) < 0);
    Assert.assertTrue(comparator.compare(orcKey3, orcKey4) < 0);
    Assert.assertTrue(comparator.compare(orcKey3, orcKey5) < 0);
    Assert.assertTrue(comparator.compare(orcKey4, orcKey5) == 0);
}
Also used : OrcStruct(org.apache.orc.mapred.OrcStruct) Configuration(org.apache.hadoop.conf.Configuration) OrcList(org.apache.orc.mapred.OrcList) TypeDescription(org.apache.orc.TypeDescription) OrcUnion(org.apache.orc.mapred.OrcUnion) OrcKey(org.apache.orc.mapred.OrcKey) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.testng.annotations.Test)

Aggregations

OrcKey (org.apache.orc.mapred.OrcKey)6 OrcStruct (org.apache.orc.mapred.OrcStruct)5 Configuration (org.apache.hadoop.conf.Configuration)4 TypeDescription (org.apache.orc.TypeDescription)4 Test (org.testng.annotations.Test)4 IntWritable (org.apache.hadoop.io.IntWritable)3 OrcList (org.apache.orc.mapred.OrcList)2 DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)1 Text (org.apache.hadoop.io.Text)1 JobConf (org.apache.hadoop.mapred.JobConf)1 OrcMap (org.apache.orc.mapred.OrcMap)1 OrcUnion (org.apache.orc.mapred.OrcUnion)1 OrcValue (org.apache.orc.mapred.OrcValue)1