Search in sources :

Example 11 with EmbeddedGobblin

use of org.apache.gobblin.runtime.embedded.EmbeddedGobblin in project incubator-gobblin by apache.

the class MRTaskFactoryTest method test.

@Test
public void test() throws Exception {
    File inputSuperPath = Files.createTempDir();
    inputSuperPath.deleteOnExit();
    File outputSuperPath = Files.createTempDir();
    outputSuperPath.deleteOnExit();
    File job1Dir = new File(inputSuperPath, "job1");
    Assert.assertTrue(job1Dir.mkdir());
    writeFileWithContent(job1Dir, "file1", "word1 word1 word2");
    writeFileWithContent(job1Dir, "file2", "word2 word2 word2");
    File job2Dir = new File(inputSuperPath, "job2");
    Assert.assertTrue(job2Dir.mkdir());
    writeFileWithContent(job2Dir, "file1", "word1 word2 word2");
    EmbeddedGobblin embeddedGobblin = new EmbeddedGobblin("WordCounter").setConfiguration(ConfigurationKeys.SOURCE_CLASS_KEY, MRWordCountSource.class.getName()).setConfiguration(MRWordCountSource.INPUT_DIRECTORIES_KEY, job1Dir.getAbsolutePath() + "," + job2Dir.getAbsolutePath()).setConfiguration(MRWordCountSource.OUTPUT_LOCATION, outputSuperPath.getAbsolutePath());
    JobExecutionResult result = embeddedGobblin.run();
    Assert.assertTrue(result.isSuccessful());
    File output1 = new File(new File(outputSuperPath, "job1"), "part-r-00000");
    Assert.assertTrue(output1.exists());
    Map<String, Integer> counts = parseCounts(output1);
    Assert.assertEquals((int) counts.get("word1"), 2);
    Assert.assertEquals((int) counts.get("word2"), 4);
    File output2 = new File(new File(outputSuperPath, "job2"), "part-r-00000");
    Assert.assertTrue(output2.exists());
    counts = parseCounts(output2);
    Assert.assertEquals((int) counts.get("word1"), 1);
    Assert.assertEquals((int) counts.get("word2"), 2);
}
Also used : JobExecutionResult(org.apache.gobblin.runtime.api.JobExecutionResult) EmbeddedGobblin(org.apache.gobblin.runtime.embedded.EmbeddedGobblin) File(java.io.File) Test(org.testng.annotations.Test)

Example 12 with EmbeddedGobblin

use of org.apache.gobblin.runtime.embedded.EmbeddedGobblin in project incubator-gobblin by apache.

the class PerformanceTest method testGobblinThroughput.

/**
 * Test the throughput of a Gobblin pipeline with trivial source and writers and no converters / forks, etc.
 */
public static void testGobblinThroughput() throws Exception {
    String eventBusId = PerformanceTest.class.getName();
    EmbeddedGobblin embeddedGobblin = new EmbeddedGobblin("PerformanceTest").setTemplate("resource:///templates/performanceTest.template").setConfiguration(GobblinTestEventBusWriter.FULL_EVENTBUSID_KEY, eventBusId);
    EventHandler eventHandler = new EventHandler();
    TestingEventBuses.getEventBus(eventBusId).register(eventHandler);
    embeddedGobblin.run();
    Assert.assertEquals(eventHandler.runSummaries.size(), 1);
    GobblinTestEventBusWriter.RunSummary runSummary = eventHandler.runSummaries.get(0);
    System.out.println(String.format("Task processed %d records in %d millis, qps: %f", runSummary.getRecordsWritten(), runSummary.getTimeElapsedMillis(), (double) runSummary.getRecordsWritten() * 1000 / runSummary.getTimeElapsedMillis()));
}
Also used : EmbeddedGobblin(org.apache.gobblin.runtime.embedded.EmbeddedGobblin) GobblinTestEventBusWriter(org.apache.gobblin.writer.test.GobblinTestEventBusWriter)

Example 13 with EmbeddedGobblin

use of org.apache.gobblin.runtime.embedded.EmbeddedGobblin in project incubator-gobblin by apache.

the class MRCompactionTaskTest method testNonDedup.

@Test
public void testNonDedup() throws Exception {
    File basePath = Files.createTempDir();
    basePath.deleteOnExit();
    File jobDir = new File(basePath, "Identity/MemberAccount/minutely/2017/04/03/10/20_30/run_2017-04-03-10-20");
    Assert.assertTrue(jobDir.mkdirs());
    GenericRecord r1 = createRandomRecord();
    GenericRecord r2 = createRandomRecord();
    writeFileWithContent(jobDir, "file1", r1, 20);
    writeFileWithContent(jobDir, "file2", r2, 18);
    EmbeddedGobblin embeddedGobblin = createEmbeddedGobblin("non-dedup", basePath.getAbsolutePath().toString());
    JobExecutionResult result = embeddedGobblin.run();
    Assert.assertTrue(result.isSuccessful());
}
Also used : JobExecutionResult(org.apache.gobblin.runtime.api.JobExecutionResult) EmbeddedGobblin(org.apache.gobblin.runtime.embedded.EmbeddedGobblin) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) Test(org.testng.annotations.Test)

Example 14 with EmbeddedGobblin

use of org.apache.gobblin.runtime.embedded.EmbeddedGobblin in project incubator-gobblin by apache.

the class MRCompactionTaskTest method testRecompaction.

@Test
public void testRecompaction() throws Exception {
    FileSystem fs = getFileSystem();
    String basePath = "/tmp/testRecompaction";
    fs.delete(new Path(basePath), true);
    File jobDir = new File(basePath, "Identity/MemberAccount/minutely/2017/04/03/10/20_30/run_2017-04-03-10-20");
    Assert.assertTrue(jobDir.mkdirs());
    GenericRecord r1 = createRandomRecord();
    writeFileWithContent(jobDir, "file1", r1, 20);
    EmbeddedGobblin embeddedGobblin = createEmbeddedGobblin("Recompaction-First", basePath);
    JobExecutionResult result = embeddedGobblin.run();
    long recordCount = InputRecordCountHelper.readRecordCount(fs, (new Path(basePath, new Path("Identity/MemberAccount/hourly/2017/04/03/10"))));
    Assert.assertTrue(result.isSuccessful());
    Assert.assertEquals(recordCount, 20);
    // Now write more avro files to input dir
    writeFileWithContent(jobDir, "file2", r1, 22);
    EmbeddedGobblin embeddedGobblin_2 = createEmbeddedGobblin("Recompaction-Second", basePath);
    embeddedGobblin_2.run();
    Assert.assertTrue(result.isSuccessful());
    // If recompaction is succeeded, a new record count should be written.
    recordCount = InputRecordCountHelper.readRecordCount(fs, (new Path(basePath, new Path("Identity/MemberAccount/hourly/2017/04/03/10"))));
    Assert.assertEquals(recordCount, 42);
    Assert.assertTrue(fs.exists(new Path(basePath, "Identity/MemberAccount/hourly/2017/04/03/10")));
}
Also used : Path(org.apache.hadoop.fs.Path) JobExecutionResult(org.apache.gobblin.runtime.api.JobExecutionResult) FileSystem(org.apache.hadoop.fs.FileSystem) EmbeddedGobblin(org.apache.gobblin.runtime.embedded.EmbeddedGobblin) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) Test(org.testng.annotations.Test)

Example 15 with EmbeddedGobblin

use of org.apache.gobblin.runtime.embedded.EmbeddedGobblin in project incubator-gobblin by apache.

the class MRCompactionTaskTest method testDedup.

@Test
public void testDedup() throws Exception {
    File basePath = Files.createTempDir();
    basePath.deleteOnExit();
    File jobDir = new File(basePath, "Identity/MemberAccount/minutely/2017/04/03/10/20_30/run_2017-04-03-10-20");
    Assert.assertTrue(jobDir.mkdirs());
    GenericRecord r1 = createRandomRecord();
    GenericRecord r2 = createRandomRecord();
    writeFileWithContent(jobDir, "file1", r1, 20);
    writeFileWithContent(jobDir, "file2", r2, 18);
    EmbeddedGobblin embeddedGobblin = createEmbeddedGobblin("dedup", basePath.getAbsolutePath().toString());
    JobExecutionResult result = embeddedGobblin.run();
    Assert.assertTrue(result.isSuccessful());
}
Also used : JobExecutionResult(org.apache.gobblin.runtime.api.JobExecutionResult) EmbeddedGobblin(org.apache.gobblin.runtime.embedded.EmbeddedGobblin) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) Test(org.testng.annotations.Test)

Aggregations

EmbeddedGobblin (org.apache.gobblin.runtime.embedded.EmbeddedGobblin)16 Test (org.testng.annotations.Test)14 JobExecutionResult (org.apache.gobblin.runtime.api.JobExecutionResult)12 File (java.io.File)11 GenericRecord (org.apache.avro.generic.GenericRecord)7 EventBus (com.google.common.eventbus.EventBus)3 TestingEventBusAsserter (org.apache.gobblin.writer.test.TestingEventBusAsserter)2 TestingEventBuses (org.apache.gobblin.writer.test.TestingEventBuses)2 ExecutionException (java.util.concurrent.ExecutionException)1 TimeoutException (java.util.concurrent.TimeoutException)1 JobExecutionDriver (org.apache.gobblin.runtime.api.JobExecutionDriver)1 ClassAliasResolver (org.apache.gobblin.util.ClassAliasResolver)1 GobblinTestEventBusWriter (org.apache.gobblin.writer.test.GobblinTestEventBusWriter)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1