use of org.apache.gobblin.runtime.embedded.EmbeddedGobblin in project incubator-gobblin by apache.
the class MRTaskFactoryTest method test.
@Test
public void test() throws Exception {
File inputSuperPath = Files.createTempDir();
inputSuperPath.deleteOnExit();
File outputSuperPath = Files.createTempDir();
outputSuperPath.deleteOnExit();
File job1Dir = new File(inputSuperPath, "job1");
Assert.assertTrue(job1Dir.mkdir());
writeFileWithContent(job1Dir, "file1", "word1 word1 word2");
writeFileWithContent(job1Dir, "file2", "word2 word2 word2");
File job2Dir = new File(inputSuperPath, "job2");
Assert.assertTrue(job2Dir.mkdir());
writeFileWithContent(job2Dir, "file1", "word1 word2 word2");
EmbeddedGobblin embeddedGobblin = new EmbeddedGobblin("WordCounter").setConfiguration(ConfigurationKeys.SOURCE_CLASS_KEY, MRWordCountSource.class.getName()).setConfiguration(MRWordCountSource.INPUT_DIRECTORIES_KEY, job1Dir.getAbsolutePath() + "," + job2Dir.getAbsolutePath()).setConfiguration(MRWordCountSource.OUTPUT_LOCATION, outputSuperPath.getAbsolutePath());
JobExecutionResult result = embeddedGobblin.run();
Assert.assertTrue(result.isSuccessful());
File output1 = new File(new File(outputSuperPath, "job1"), "part-r-00000");
Assert.assertTrue(output1.exists());
Map<String, Integer> counts = parseCounts(output1);
Assert.assertEquals((int) counts.get("word1"), 2);
Assert.assertEquals((int) counts.get("word2"), 4);
File output2 = new File(new File(outputSuperPath, "job2"), "part-r-00000");
Assert.assertTrue(output2.exists());
counts = parseCounts(output2);
Assert.assertEquals((int) counts.get("word1"), 1);
Assert.assertEquals((int) counts.get("word2"), 2);
}
use of org.apache.gobblin.runtime.embedded.EmbeddedGobblin in project incubator-gobblin by apache.
the class PerformanceTest method testGobblinThroughput.
/**
* Test the throughput of a Gobblin pipeline with trivial source and writers and no converters / forks, etc.
*/
public static void testGobblinThroughput() throws Exception {
String eventBusId = PerformanceTest.class.getName();
EmbeddedGobblin embeddedGobblin = new EmbeddedGobblin("PerformanceTest").setTemplate("resource:///templates/performanceTest.template").setConfiguration(GobblinTestEventBusWriter.FULL_EVENTBUSID_KEY, eventBusId);
EventHandler eventHandler = new EventHandler();
TestingEventBuses.getEventBus(eventBusId).register(eventHandler);
embeddedGobblin.run();
Assert.assertEquals(eventHandler.runSummaries.size(), 1);
GobblinTestEventBusWriter.RunSummary runSummary = eventHandler.runSummaries.get(0);
System.out.println(String.format("Task processed %d records in %d millis, qps: %f", runSummary.getRecordsWritten(), runSummary.getTimeElapsedMillis(), (double) runSummary.getRecordsWritten() * 1000 / runSummary.getTimeElapsedMillis()));
}
use of org.apache.gobblin.runtime.embedded.EmbeddedGobblin in project incubator-gobblin by apache.
the class MRCompactionTaskTest method testNonDedup.
@Test
public void testNonDedup() throws Exception {
File basePath = Files.createTempDir();
basePath.deleteOnExit();
File jobDir = new File(basePath, "Identity/MemberAccount/minutely/2017/04/03/10/20_30/run_2017-04-03-10-20");
Assert.assertTrue(jobDir.mkdirs());
GenericRecord r1 = createRandomRecord();
GenericRecord r2 = createRandomRecord();
writeFileWithContent(jobDir, "file1", r1, 20);
writeFileWithContent(jobDir, "file2", r2, 18);
EmbeddedGobblin embeddedGobblin = createEmbeddedGobblin("non-dedup", basePath.getAbsolutePath().toString());
JobExecutionResult result = embeddedGobblin.run();
Assert.assertTrue(result.isSuccessful());
}
use of org.apache.gobblin.runtime.embedded.EmbeddedGobblin in project incubator-gobblin by apache.
the class MRCompactionTaskTest method testRecompaction.
@Test
public void testRecompaction() throws Exception {
FileSystem fs = getFileSystem();
String basePath = "/tmp/testRecompaction";
fs.delete(new Path(basePath), true);
File jobDir = new File(basePath, "Identity/MemberAccount/minutely/2017/04/03/10/20_30/run_2017-04-03-10-20");
Assert.assertTrue(jobDir.mkdirs());
GenericRecord r1 = createRandomRecord();
writeFileWithContent(jobDir, "file1", r1, 20);
EmbeddedGobblin embeddedGobblin = createEmbeddedGobblin("Recompaction-First", basePath);
JobExecutionResult result = embeddedGobblin.run();
long recordCount = InputRecordCountHelper.readRecordCount(fs, (new Path(basePath, new Path("Identity/MemberAccount/hourly/2017/04/03/10"))));
Assert.assertTrue(result.isSuccessful());
Assert.assertEquals(recordCount, 20);
// Now write more avro files to input dir
writeFileWithContent(jobDir, "file2", r1, 22);
EmbeddedGobblin embeddedGobblin_2 = createEmbeddedGobblin("Recompaction-Second", basePath);
embeddedGobblin_2.run();
Assert.assertTrue(result.isSuccessful());
// If recompaction is succeeded, a new record count should be written.
recordCount = InputRecordCountHelper.readRecordCount(fs, (new Path(basePath, new Path("Identity/MemberAccount/hourly/2017/04/03/10"))));
Assert.assertEquals(recordCount, 42);
Assert.assertTrue(fs.exists(new Path(basePath, "Identity/MemberAccount/hourly/2017/04/03/10")));
}
use of org.apache.gobblin.runtime.embedded.EmbeddedGobblin in project incubator-gobblin by apache.
the class MRCompactionTaskTest method testDedup.
@Test
public void testDedup() throws Exception {
File basePath = Files.createTempDir();
basePath.deleteOnExit();
File jobDir = new File(basePath, "Identity/MemberAccount/minutely/2017/04/03/10/20_30/run_2017-04-03-10-20");
Assert.assertTrue(jobDir.mkdirs());
GenericRecord r1 = createRandomRecord();
GenericRecord r2 = createRandomRecord();
writeFileWithContent(jobDir, "file1", r1, 20);
writeFileWithContent(jobDir, "file2", r2, 18);
EmbeddedGobblin embeddedGobblin = createEmbeddedGobblin("dedup", basePath.getAbsolutePath().toString());
JobExecutionResult result = embeddedGobblin.run();
Assert.assertTrue(result.isSuccessful());
}
Aggregations