Search in sources :

Example 1 with DatasetState

use of org.apache.gobblin.runtime.JobState.DatasetState in project incubator-gobblin by apache.

the class FsCommitSequenceStoreTest method setUp.

@BeforeClass
public void setUp() throws IOException {
    FileSystem fs = FileSystem.getLocal(new Configuration());
    this.store = new FsCommitSequenceStore(fs, new Path("commit-sequence-store-test"));
    State props = new State();
    props.setId("propsId");
    props.setProp("prop1", "valueOfProp1");
    props.setProp("prop2", "valueOfProp2");
    DatasetState datasetState = new DatasetState();
    datasetState.setDatasetUrn(this.datasetUrn);
    datasetState.incrementJobFailures();
    this.sequence = new CommitSequence.Builder().withJobName("testjob").withDatasetUrn("testurn").beginStep(FsRenameCommitStep.Builder.class).from(new Path("/ab/cd")).to(new Path("/ef/gh")).withProps(props).endStep().beginStep(DatasetStateCommitStep.Builder.class).withDatasetUrn(this.datasetUrn).withDatasetState(datasetState).withProps(props).endStep().build();
}
Also used : FsCommitSequenceStore(org.apache.gobblin.runtime.commit.FsCommitSequenceStore) Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) DatasetStateCommitStep(org.apache.gobblin.runtime.commit.DatasetStateCommitStep) State(org.apache.gobblin.configuration.State) DatasetState(org.apache.gobblin.runtime.JobState.DatasetState) FileSystem(org.apache.hadoop.fs.FileSystem) DatasetState(org.apache.gobblin.runtime.JobState.DatasetState) BeforeClass(org.testng.annotations.BeforeClass)

Example 2 with DatasetState

use of org.apache.gobblin.runtime.JobState.DatasetState in project incubator-gobblin by apache.

the class JobLauncherTestHelper method runTestWithMultipleDatasets.

public void runTestWithMultipleDatasets(Properties jobProps) throws Exception {
    String jobName = jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY);
    String jobId = JobLauncherUtils.newJobId(jobName).toString();
    jobProps.setProperty(ConfigurationKeys.JOB_ID_KEY, jobId);
    jobProps.setProperty(ConfigurationKeys.SOURCE_CLASS_KEY, MultiDatasetTestSource.class.getName());
    Closer closer = Closer.create();
    try {
        JobLauncher jobLauncher = closer.register(JobLauncherFactory.newJobLauncher(this.launcherProps, jobProps));
        jobLauncher.launchJob(null);
    } finally {
        closer.close();
    }
    for (int i = 0; i < 4; i++) {
        List<JobState.DatasetState> datasetStateList = this.datasetStateStore.getAll(jobName, "Dataset" + i + "-current.jst");
        DatasetState datasetState = datasetStateList.get(0);
        Assert.assertEquals(datasetState.getDatasetUrn(), "Dataset" + i);
        Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED);
        Assert.assertEquals(datasetState.getCompletedTasks(), 1);
        Assert.assertEquals(datasetState.getJobFailures(), 0);
        for (TaskState taskState : datasetState.getTaskStates()) {
            Assert.assertEquals(taskState.getProp(ConfigurationKeys.DATASET_URN_KEY), "Dataset" + i);
            Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED);
            Assert.assertEquals(taskState.getPropAsLong(ConfigurationKeys.WRITER_RECORDS_WRITTEN), TestExtractor.TOTAL_RECORDS);
        }
    }
}
Also used : Closer(com.google.common.io.Closer) DatasetState(org.apache.gobblin.runtime.JobState.DatasetState)

Example 3 with DatasetState

use of org.apache.gobblin.runtime.JobState.DatasetState in project incubator-gobblin by apache.

the class CommitSequenceTest method setUp.

@BeforeClass
public void setUp() throws IOException {
    this.fs = FileSystem.getLocal(new Configuration());
    this.fs.delete(new Path(ROOT_DIR), true);
    Path storeRootDir = new Path(ROOT_DIR, "store");
    Path dir1 = new Path(ROOT_DIR, "dir1");
    Path dir2 = new Path(ROOT_DIR, "dir2");
    this.fs.mkdirs(dir1);
    this.fs.mkdirs(dir2);
    Path src1 = new Path(dir1, "file1");
    Path src2 = new Path(dir2, "file2");
    Path dst1 = new Path(dir2, "file1");
    Path dst2 = new Path(dir1, "file2");
    this.fs.createNewFile(src1);
    this.fs.createNewFile(src2);
    DatasetState ds = new DatasetState("job-name", "job-id");
    ds.setDatasetUrn("urn");
    ds.setNoJobFailure();
    State state = new State();
    state.setProp(ConfigurationKeys.STATE_STORE_ROOT_DIR_KEY, storeRootDir.toString());
    this.sequence = new CommitSequence.Builder().withJobName("testjob").withDatasetUrn("testurn").beginStep(FsRenameCommitStep.Builder.class).from(src1).to(dst1).withProps(state).endStep().beginStep(FsRenameCommitStep.Builder.class).from(src2).to(dst2).withProps(state).endStep().beginStep(DatasetStateCommitStep.Builder.class).withDatasetUrn("urn").withDatasetState(ds).withProps(state).endStep().build();
}
Also used : Path(org.apache.hadoop.fs.Path) FsRenameCommitStep(org.apache.gobblin.commit.FsRenameCommitStep) CommitSequence(org.apache.gobblin.commit.CommitSequence) Configuration(org.apache.hadoop.conf.Configuration) State(org.apache.gobblin.configuration.State) DatasetState(org.apache.gobblin.runtime.JobState.DatasetState) DatasetState(org.apache.gobblin.runtime.JobState.DatasetState) BeforeClass(org.testng.annotations.BeforeClass)

Example 4 with DatasetState

use of org.apache.gobblin.runtime.JobState.DatasetState in project incubator-gobblin by apache.

the class JobLauncherTestHelper method runTestWithPullLimit.

public void runTestWithPullLimit(Properties jobProps, long limit) throws Exception {
    String jobName = jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY);
    String jobId = JobLauncherUtils.newJobId(jobName).toString();
    jobProps.setProperty(ConfigurationKeys.JOB_ID_KEY, jobId);
    Closer closer = Closer.create();
    try {
        JobLauncher jobLauncher = closer.register(JobLauncherFactory.newJobLauncher(this.launcherProps, jobProps));
        jobLauncher.launchJob(null);
    } finally {
        closer.close();
    }
    List<JobState.DatasetState> datasetStateList = this.datasetStateStore.getAll(jobName, sanitizeJobNameForDatasetStore(jobId) + ".jst");
    DatasetState datasetState = datasetStateList.get(0);
    Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED);
    Assert.assertEquals(datasetState.getCompletedTasks(), 4);
    Assert.assertEquals(datasetState.getJobFailures(), 0);
    for (TaskState taskState : datasetState.getTaskStates()) {
        Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED);
        Assert.assertEquals(taskState.getPropAsLong(ConfigurationKeys.EXTRACTOR_ROWS_EXTRACTED), limit);
        Assert.assertEquals(taskState.getPropAsLong(ConfigurationKeys.WRITER_ROWS_WRITTEN), limit);
    }
}
Also used : Closer(com.google.common.io.Closer) DatasetState(org.apache.gobblin.runtime.JobState.DatasetState)

Example 5 with DatasetState

use of org.apache.gobblin.runtime.JobState.DatasetState in project incubator-gobblin by apache.

the class JobLauncherTestHelper method runTest.

public void runTest(Properties jobProps) throws Exception {
    String jobName = jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY);
    String jobId = JobLauncherUtils.newJobId(jobName);
    jobProps.setProperty(ConfigurationKeys.JOB_ID_KEY, jobId);
    JobContext jobContext = null;
    Closer closer = Closer.create();
    try {
        JobLauncher jobLauncher = closer.register(JobLauncherFactory.newJobLauncher(this.launcherProps, jobProps));
        jobLauncher.launchJob(null);
        jobContext = ((AbstractJobLauncher) jobLauncher).getJobContext();
    } finally {
        closer.close();
    }
    Assert.assertTrue(jobContext.getJobMetricsOptional().isPresent());
    String jobMetricContextTags = jobContext.getJobMetricsOptional().get().getMetricContext().getTags().toString();
    Assert.assertTrue(jobMetricContextTags.contains(ClusterNameTags.CLUSTER_IDENTIFIER_TAG_NAME), ClusterNameTags.CLUSTER_IDENTIFIER_TAG_NAME + " tag missing in job metric context tags.");
    List<JobState.DatasetState> datasetStateList = this.datasetStateStore.getAll(jobName, sanitizeJobNameForDatasetStore(jobId) + ".jst");
    DatasetState datasetState = datasetStateList.get(0);
    Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED);
    Assert.assertEquals(datasetState.getCompletedTasks(), 4);
    Assert.assertEquals(datasetState.getJobFailures(), 0);
    for (TaskState taskState : datasetState.getTaskStates()) {
        Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED);
        Assert.assertEquals(taskState.getPropAsLong(ConfigurationKeys.WRITER_RECORDS_WRITTEN), TestExtractor.TOTAL_RECORDS);
    }
}
Also used : Closer(com.google.common.io.Closer) DatasetState(org.apache.gobblin.runtime.JobState.DatasetState)

Aggregations

DatasetState (org.apache.gobblin.runtime.JobState.DatasetState)8 Closer (com.google.common.io.Closer)4 Configuration (org.apache.hadoop.conf.Configuration)3 Path (org.apache.hadoop.fs.Path)3 State (org.apache.gobblin.configuration.State)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 BeforeClass (org.testng.annotations.BeforeClass)2 Function (com.google.common.base.Function)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 IOException (java.io.IOException)1 Map (java.util.Map)1 CommitSequence (org.apache.gobblin.commit.CommitSequence)1 DeliverySemantics (org.apache.gobblin.commit.DeliverySemantics)1 FsRenameCommitStep (org.apache.gobblin.commit.FsRenameCommitStep)1 DatasetStateCommitStep (org.apache.gobblin.runtime.commit.DatasetStateCommitStep)1 FsCommitSequenceStore (org.apache.gobblin.runtime.commit.FsCommitSequenceStore)1 Either (org.apache.gobblin.util.Either)1