use of org.apache.orc.mapred.OrcStruct in project incubator-gobblin by apache.
the class OrcCompactionTaskTest method writeOrcRecordsInFile.
private void writeOrcRecordsInFile(Path path, TypeDescription schema, List<OrcStruct> orcStructs) throws Exception {
Configuration configuration = new Configuration();
OrcFile.WriterOptions options = OrcFile.writerOptions(configuration).setSchema(schema);
Writer writer = OrcFile.createWriter(path, options);
OrcMapreduceRecordWriter recordWriter = new OrcMapreduceRecordWriter(writer);
for (OrcStruct orcRecord : orcStructs) {
recordWriter.write(NullWritable.get(), orcRecord);
}
recordWriter.close(new TaskAttemptContextImpl(configuration, new TaskAttemptID()));
}
use of org.apache.orc.mapred.OrcStruct in project incubator-gobblin by apache.
the class OrcCompactionTaskTest method basicTestWithShuffleKeySpecified.
@Test
public void basicTestWithShuffleKeySpecified() throws Exception {
File basePath = Files.createTempDir();
basePath.deleteOnExit();
String minutelyPath = "Identity/MemberAccount/minutely/2017/04/03/10/20_30/run_2017-04-03-10-20";
String hourlyPath = "Identity/MemberAccount/hourly/2017/04/03/10/";
File jobDir = new File(basePath, minutelyPath);
Assert.assertTrue(jobDir.mkdirs());
// Writing some basic ORC files
// Testing data is schema'ed with "struct<i:int,j:int>"
createTestingData(jobDir);
EmbeddedGobblin embeddedGobblin = TestCompactionTaskUtils.createEmbeddedGobblinCompactionJob("basic", basePath.getAbsolutePath()).setConfiguration(CompactionJobConfigurator.COMPACTION_JOB_CONFIGURATOR_FACTORY_CLASS_KEY, TestCompactionOrcJobConfigurator.Factory.class.getName()).setConfiguration(COMPACTION_OUTPUT_EXTENSION, extensionName).setConfiguration(ORC_MAPPER_SHUFFLE_KEY_SCHEMA, "struct<k:int>");
JobExecutionResult execution = embeddedGobblin.run();
Assert.assertTrue(execution.isSuccessful());
// Result verification
File outputDir = new File(basePath, hourlyPath);
FileSystem fs = FileSystem.getLocal(new Configuration());
List<FileStatus> statuses = new ArrayList<>();
reloadFolder(statuses, outputDir, fs);
Assert.assertTrue(statuses.size() == 1);
List<OrcStruct> result = readOrcFile(statuses.get(0).getPath());
Assert.assertEquals(result.size(), 3);
Assert.assertEquals(result.get(0).getFieldValue("i"), new IntWritable(1));
Assert.assertEquals(result.get(0).getFieldValue("j"), new IntWritable(2));
Assert.assertEquals(result.get(1).getFieldValue("i"), new IntWritable(2));
Assert.assertEquals(result.get(1).getFieldValue("j"), new IntWritable(3));
Assert.assertEquals(result.get(2).getFieldValue("i"), new IntWritable(4));
Assert.assertEquals(result.get(2).getFieldValue("j"), new IntWritable(5));
}
use of org.apache.orc.mapred.OrcStruct in project incubator-gobblin by apache.
the class OrcCompactionTaskTest method testNonDedup.
@Test
public void testNonDedup() throws Exception {
File basePath = Files.createTempDir();
basePath.deleteOnExit();
String minutelyPath = "Identity/MemberAccount_2/minutely/2017/04/03/10/20_30/run_2017-04-03-10-20";
String hourlyPath = "Identity/MemberAccount_2/hourly/2017/04/03/10/";
File jobDir = new File(basePath, minutelyPath);
Assert.assertTrue(jobDir.mkdirs());
createTestingData(jobDir);
EmbeddedGobblin embeddedGobblin_nondedup = createEmbeddedGobblinCompactionJob("basic", basePath.getAbsolutePath().toString()).setConfiguration(CompactionJobConfigurator.COMPACTION_JOB_CONFIGURATOR_FACTORY_CLASS_KEY, TestCompactionOrcJobConfigurator.Factory.class.getName()).setConfiguration(COMPACTION_OUTPUT_EXTENSION, "orc").setConfiguration(COMPACTION_SHOULD_DEDUPLICATE, "false");
JobExecutionResult execution = embeddedGobblin_nondedup.run();
Assert.assertTrue(execution.isSuccessful());
// Non-dedup result verification
File outputDir = new File(basePath, hourlyPath);
FileSystem fs = FileSystem.getLocal(new Configuration());
List<FileStatus> statuses = new ArrayList<>();
for (FileStatus status : fs.listStatus(new Path(outputDir.getAbsolutePath()), new PathFilter() {
@Override
public boolean accept(Path path) {
return FilenameUtils.isExtension(path.getName(), "orc");
}
})) {
statuses.add(status);
}
Assert.assertTrue(statuses.size() == 1);
List<OrcStruct> result = readOrcFile(statuses.get(0).getPath());
Assert.assertEquals(result.size(), 4);
result.sort(new Comparator<OrcStruct>() {
@Override
public int compare(OrcStruct o1, OrcStruct o2) {
return o1.compareTo(o2);
}
});
Assert.assertEquals(result.get(0).getFieldValue("i"), new IntWritable(1));
Assert.assertEquals(result.get(0).getFieldValue("j"), new IntWritable(2));
Assert.assertEquals(result.get(1).getFieldValue("i"), new IntWritable(1));
Assert.assertEquals(result.get(1).getFieldValue("j"), new IntWritable(2));
Assert.assertEquals(result.get(2).getFieldValue("i"), new IntWritable(2));
Assert.assertEquals(result.get(2).getFieldValue("j"), new IntWritable(3));
Assert.assertEquals(result.get(3).getFieldValue("i"), new IntWritable(4));
Assert.assertEquals(result.get(3).getFieldValue("j"), new IntWritable(5));
}
use of org.apache.orc.mapred.OrcStruct in project incubator-gobblin by apache.
the class OrcKeyComparatorTest method testComplexRecordArray.
@Test
public void testComplexRecordArray() throws Exception {
OrcKeyComparator comparator = new OrcKeyComparator();
Configuration conf = new Configuration();
TypeDescription listSchema = TypeDescription.createList(TypeDescription.createString());
TypeDescription schema = TypeDescription.createStruct().addField("a", TypeDescription.createInt()).addField("b", listSchema);
conf.set(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute(), schema.toString());
Assert.assertEquals(conf.get(OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.getAttribute()), schema.toString());
comparator.setConf(conf);
// base record
OrcStruct record0 = (OrcStruct) OrcStruct.createValue(schema);
record0.setFieldValue("a", new IntWritable(1));
OrcList orcList0 = createOrcList(3, listSchema, 3);
record0.setFieldValue("b", orcList0);
// the same as base but different object, expecting equal to each other.
OrcStruct record1 = (OrcStruct) OrcStruct.createValue(schema);
record1.setFieldValue("a", new IntWritable(1));
OrcList orcList1 = createOrcList(3, listSchema, 3);
record1.setFieldValue("b", orcList1);
// Diff in int field
OrcStruct record2 = (OrcStruct) OrcStruct.createValue(schema);
record2.setFieldValue("a", new IntWritable(2));
OrcList orcList2 = createOrcList(3, listSchema, 3);
record2.setFieldValue("b", orcList2);
// Diff in array field: 1
OrcStruct record3 = (OrcStruct) OrcStruct.createValue(schema);
record3.setFieldValue("a", new IntWritable(1));
OrcList orcList3 = createOrcList(3, listSchema, 5);
record3.setFieldValue("b", orcList3);
// Diff in array field: 2
OrcStruct record4 = (OrcStruct) OrcStruct.createValue(schema);
record4.setFieldValue("a", new IntWritable(1));
OrcList orcList4 = createOrcList(4, listSchema, 3);
record4.setFieldValue("b", orcList4);
OrcKey orcKey0 = new OrcKey();
orcKey0.key = record0;
OrcKey orcKey1 = new OrcKey();
orcKey1.key = record1;
OrcKey orcKey2 = new OrcKey();
orcKey2.key = record2;
OrcKey orcKey3 = new OrcKey();
orcKey3.key = record3;
OrcKey orcKey4 = new OrcKey();
orcKey4.key = record4;
Assert.assertTrue(comparator.compare(orcKey0, orcKey1) == 0);
Assert.assertTrue(comparator.compare(orcKey1, orcKey2) < 0);
Assert.assertTrue(comparator.compare(orcKey1, orcKey3) < 0);
Assert.assertTrue(comparator.compare(orcKey1, orcKey4) < 0);
}
use of org.apache.orc.mapred.OrcStruct in project incubator-gobblin by apache.
the class OrcKeyComparatorTest method createSimpleOrcStruct.
private OrcStruct createSimpleOrcStruct(TypeDescription structSchema, int value1, int value2) {
OrcStruct result = new OrcStruct(structSchema);
result.setFieldValue(0, new IntWritable(value1));
result.setFieldValue(1, new IntWritable(value2));
return result;
}
Aggregations