Search in sources :

Example 31 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class TestStressTestingSource method testRunDuration.

@Test(enabled = false)
public void testRunDuration() throws DataRecordException, IOException {
    final int MEM_ALLOC_BYTES = 100;
    final int NUM_WORK_UNITS = 1;
    final int SLEEP_TIME_MICRO = 1000;
    // this config is ignored since the duration is set
    final int NUM_RECORDS = 30;
    final int RUN_DURATION_SECS = 5;
    SourceState state = new SourceState();
    state.setProp(StressTestingSource.NUM_WORK_UNITS_KEY, NUM_WORK_UNITS);
    state.setProp(StressTestingSource.MEM_ALLOC_BYTES_KEY, MEM_ALLOC_BYTES);
    state.setProp(StressTestingSource.SLEEP_TIME_MICRO_KEY, SLEEP_TIME_MICRO);
    state.setProp(StressTestingSource.NUM_RECORDS_KEY, NUM_RECORDS);
    state.setProp(StressTestingSource.RUN_DURATION_KEY, RUN_DURATION_SECS);
    StressTestingSource source = new StressTestingSource();
    List<WorkUnit> wus = source.getWorkunits(state);
    Assert.assertEquals(wus.size(), NUM_WORK_UNITS);
    WorkUnit wu = wus.get(0);
    WorkUnitState wuState = new WorkUnitState(wu, state);
    Extractor<String, byte[]> extractor = source.getExtractor(wuState);
    byte[] record;
    long startTimeNano = System.nanoTime();
    while ((record = extractor.readRecord(null)) != null) {
        Assert.assertEquals(record.length, 100);
    }
    long endTimeNano = System.nanoTime();
    long timeSpentMicro = (endTimeNano - startTimeNano) / (1000);
    // check that there is less than 1 second difference between expected and actual time spent
    Assert.assertTrue(Math.abs(timeSpentMicro - (RUN_DURATION_SECS * 1000000)) < (1000000), "Time spent " + timeSpentMicro);
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) Test(org.testng.annotations.Test)

Example 32 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class TestStressTestingSource method testSleepTime.

@Test(enabled = false)
public void testSleepTime() throws DataRecordException, IOException {
    final int MEM_ALLOC_BYTES = 100;
    final int NUM_WORK_UNITS = 1;
    final int SLEEP_TIME_MICRO = 10000;
    final int NUM_RECORDS = 500;
    SourceState state = new SourceState();
    state.setProp(StressTestingSource.NUM_WORK_UNITS_KEY, NUM_WORK_UNITS);
    state.setProp(StressTestingSource.MEM_ALLOC_BYTES_KEY, MEM_ALLOC_BYTES);
    state.setProp(StressTestingSource.SLEEP_TIME_MICRO_KEY, SLEEP_TIME_MICRO);
    state.setProp(StressTestingSource.NUM_RECORDS_KEY, NUM_RECORDS);
    StressTestingSource source = new StressTestingSource();
    List<WorkUnit> wus = source.getWorkunits(state);
    Assert.assertEquals(wus.size(), NUM_WORK_UNITS);
    WorkUnit wu = wus.get(0);
    WorkUnitState wuState = new WorkUnitState(wu, state);
    Extractor<String, byte[]> extractor = source.getExtractor(wuState);
    byte[] record;
    long startTimeNano = System.nanoTime();
    while ((record = extractor.readRecord(null)) != null) {
        Assert.assertEquals(record.length, 100);
    }
    long endTimeNano = System.nanoTime();
    long timeSpentMicro = (endTimeNano - startTimeNano) / (1000);
    // check that there is less than 2 second difference between expected and actual time spent
    Assert.assertTrue(Math.abs(timeSpentMicro - (SLEEP_TIME_MICRO * NUM_RECORDS)) < (2000000), "Time spent " + timeSpentMicro);
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) Test(org.testng.annotations.Test)

Example 33 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class JobLauncherUtilsTest method testDeleteStagingDataWithOutWriterFilePath.

@Test
public void testDeleteStagingDataWithOutWriterFilePath() throws IOException {
    FileSystem fs = FileSystem.getLocal(new Configuration());
    String branchName0 = "fork_0";
    String branchName1 = "fork_1";
    String namespace = "gobblin.test";
    String tableName = "test-table";
    Path rootDir = new Path("gobblin-test/job-launcher-utils-test");
    Path writerStagingDir0 = new Path(rootDir, "staging" + Path.SEPARATOR + branchName0);
    Path writerStagingDir1 = new Path(rootDir, "staging" + Path.SEPARATOR + branchName1);
    Path writerOutputDir0 = new Path(rootDir, "output" + Path.SEPARATOR + branchName0);
    Path writerOutputDir1 = new Path(rootDir, "output" + Path.SEPARATOR + branchName1);
    try {
        SourceState sourceState = new SourceState();
        WorkUnitState state = new WorkUnitState(WorkUnit.create(new Extract(sourceState, TableType.APPEND_ONLY, namespace, tableName)));
        state.setProp(ConfigurationKeys.FORK_BRANCHES_KEY, "2");
        state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.FORK_BRANCH_NAME_KEY, 2, 0), branchName0);
        state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.FORK_BRANCH_NAME_KEY, 2, 1), branchName1);
        state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, 2, 0), ConfigurationKeys.LOCAL_FS_URI);
        state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, 2, 1), ConfigurationKeys.LOCAL_FS_URI);
        state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_DIR, 2, 0), writerStagingDir0.toString());
        state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_DIR, 2, 1), writerStagingDir1.toString());
        state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_DIR, 2, 0), writerOutputDir0.toString());
        state.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_DIR, 2, 1), writerOutputDir1.toString());
        Path writerStagingPath0 = new Path(writerStagingDir0, ForkOperatorUtils.getPathForBranch(state, state.getExtract().getOutputFilePath(), 2, 0));
        fs.mkdirs(writerStagingPath0);
        Path writerStagingPath1 = new Path(writerStagingDir1, ForkOperatorUtils.getPathForBranch(state, state.getExtract().getOutputFilePath(), 2, 1));
        fs.mkdirs(writerStagingPath1);
        Path writerOutputPath0 = new Path(writerOutputDir0, ForkOperatorUtils.getPathForBranch(state, state.getExtract().getOutputFilePath(), 2, 0));
        fs.mkdirs(writerOutputPath0);
        Path writerOutputPath1 = new Path(writerOutputDir1, ForkOperatorUtils.getPathForBranch(state, state.getExtract().getOutputFilePath(), 2, 1));
        fs.mkdirs(writerOutputPath1);
        JobLauncherUtils.cleanTaskStagingData(state, LoggerFactory.getLogger(JobLauncherUtilsTest.class));
        Assert.assertFalse(fs.exists(writerStagingPath0));
        Assert.assertFalse(fs.exists(writerStagingPath1));
        Assert.assertFalse(fs.exists(writerOutputPath0));
        Assert.assertFalse(fs.exists(writerOutputPath1));
    } finally {
        fs.delete(rootDir, true);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SourceState(org.apache.gobblin.configuration.SourceState) Configuration(org.apache.hadoop.conf.Configuration) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) FileSystem(org.apache.hadoop.fs.FileSystem) Extract(org.apache.gobblin.source.workunit.Extract) Test(org.testng.annotations.Test)

Example 34 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class WriterUtilsTest method testGetDefaultWriterFilePathWithWorkUnitState.

@Test
public void testGetDefaultWriterFilePathWithWorkUnitState() {
    String namespace = "gobblin.test";
    String tableName = "test-table";
    SourceState sourceState = new SourceState();
    WorkUnit workUnit = WorkUnit.create(new Extract(sourceState, TableType.APPEND_ONLY, namespace, tableName));
    WorkUnitState workUnitState = new WorkUnitState(workUnit);
    Assert.assertEquals(WriterUtils.getWriterFilePath(workUnitState, 0, 0), new Path(workUnitState.getExtract().getOutputFilePath()));
    Assert.assertEquals(WriterUtils.getWriterFilePath(workUnitState, 2, 0), new Path(workUnitState.getExtract().getOutputFilePath(), ConfigurationKeys.DEFAULT_FORK_BRANCH_NAME + "0"));
}
Also used : Path(org.apache.hadoop.fs.Path) SourceState(org.apache.gobblin.configuration.SourceState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) Extract(org.apache.gobblin.source.workunit.Extract) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) Test(org.testng.annotations.Test)

Example 35 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class WriterUtilsTest method testGetDefaultWriterFilePath.

@Test
public void testGetDefaultWriterFilePath() {
    String namespace = "gobblin.test";
    String tableName = "test-table";
    SourceState sourceState = new SourceState();
    WorkUnit state = WorkUnit.create(new Extract(sourceState, TableType.APPEND_ONLY, namespace, tableName));
    Assert.assertEquals(WriterUtils.getWriterFilePath(state, 0, 0), new Path(state.getExtract().getOutputFilePath()));
    Assert.assertEquals(WriterUtils.getWriterFilePath(state, 2, 0), new Path(state.getExtract().getOutputFilePath(), ConfigurationKeys.DEFAULT_FORK_BRANCH_NAME + "0"));
}
Also used : Path(org.apache.hadoop.fs.Path) SourceState(org.apache.gobblin.configuration.SourceState) Extract(org.apache.gobblin.source.workunit.Extract) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) Test(org.testng.annotations.Test)

Aggregations

SourceState (org.apache.gobblin.configuration.SourceState)90 Test (org.testng.annotations.Test)76 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)44 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)38 State (org.apache.gobblin.configuration.State)30 WorkingState (org.apache.gobblin.configuration.WorkUnitState.WorkingState)11 Partition (org.apache.hadoop.hive.ql.metadata.Partition)8 Table (org.apache.hadoop.hive.ql.metadata.Table)8 IterableDatasetFinder (org.apache.gobblin.dataset.IterableDatasetFinder)7 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)7 Extract (org.apache.gobblin.source.workunit.Extract)7 DateTime (org.joda.time.DateTime)7 Dataset (org.apache.gobblin.dataset.Dataset)6 PartitionableDataset (org.apache.gobblin.dataset.PartitionableDataset)6 MultiWorkUnit (org.apache.gobblin.source.workunit.MultiWorkUnit)6 WorkUnitStream (org.apache.gobblin.source.workunit.WorkUnitStream)6 IOException (java.io.IOException)5 Path (org.apache.hadoop.fs.Path)5 Gson (com.google.gson.Gson)4 JsonObject (com.google.gson.JsonObject)4