Search in sources :

Example 51 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class StringFilterConverterTest method testConvertRecordWithSimpleRegex.

/**
 * Test for {@link StringFilterConverter#convertRecord(Class, String, WorkUnitState)} with a regex that is only a
 * sequence of letters.
 */
@Test
public void testConvertRecordWithSimpleRegex() throws DataConversionException {
    WorkUnitState workUnitState = new WorkUnitState();
    workUnitState.setProp(ConfigurationKeys.CONVERTER_STRING_FILTER_PATTERN, "HelloWorld");
    StringFilterConverter converter = new StringFilterConverter();
    converter.init(workUnitState);
    // Test that HelloWorld matches the pattern HelloWorld
    String test = "HelloWorld";
    Iterator<String> itr = converter.convertRecord(String.class, test, workUnitState).iterator();
    Assert.assertTrue(itr.hasNext());
    Assert.assertEquals(itr.next(), test);
    Assert.assertTrue(!itr.hasNext());
    // Test that Hello does not match the pattern HelloWorld
    test = "Hello";
    itr = converter.convertRecord(String.class, test, workUnitState).iterator();
    Assert.assertTrue(!itr.hasNext());
}
Also used : WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) Test(org.testng.annotations.Test)

Example 52 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class StringFilterConverterTest method testConvertRecordWithComplexRegex.

/**
 * Test for {@link StringFilterConverter#convertRecord(Class, String, WorkUnitState)} with a regex that actually uses
 * regex features, such as wildcards.
 */
@Test
public void testConvertRecordWithComplexRegex() throws DataConversionException {
    WorkUnitState workUnitState = new WorkUnitState();
    workUnitState.setProp(ConfigurationKeys.CONVERTER_STRING_FILTER_PATTERN, ".*");
    StringFilterConverter converter = new StringFilterConverter();
    converter.init(workUnitState);
    // Test that HelloWorld matches the pattern .*
    String test = "HelloWorld";
    Iterator<String> itr = converter.convertRecord(String.class, test, workUnitState).iterator();
    Assert.assertTrue(itr.hasNext());
    Assert.assertEquals(itr.next(), test);
    Assert.assertTrue(!itr.hasNext());
    // Test that Java matches the pattern .*
    test = "Java";
    itr = converter.convertRecord(String.class, test, workUnitState).iterator();
    Assert.assertTrue(itr.hasNext());
    Assert.assertEquals(itr.next(), test);
    Assert.assertTrue(!itr.hasNext());
}
Also used : WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) Test(org.testng.annotations.Test)

Example 53 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class BaseDataPublisherTest method testWithFsMetricsNoPartitions.

@Test
public void testWithFsMetricsNoPartitions() throws IOException {
    File publishPath = Files.createTempDir();
    try {
        State s = buildDefaultState(1);
        String md = new GlobalMetadata().toJson();
        s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR);
        s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, "true");
        s.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
        s.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, publishPath.getAbsolutePath());
        s.setProp(ConfigurationKeys.DATA_PUBLISHER_APPEND_EXTRACT_TO_FINAL_DIR, "false");
        s.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_FILE, "metadata.json");
        WorkUnitState wuState1 = new WorkUnitState();
        FsWriterMetrics metrics1 = buildWriterMetrics("foo1.json", null, 0, 10);
        wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics1.toJson());
        wuState1.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
        addStateToWorkunit(s, wuState1);
        WorkUnitState wuState2 = new WorkUnitState();
        FsWriterMetrics metrics3 = buildWriterMetrics("foo3.json", null, 1, 30);
        wuState2.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
        wuState2.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics3.toJson());
        addStateToWorkunit(s, wuState2);
        WorkUnitState wuState3 = new WorkUnitState();
        FsWriterMetrics metrics4 = buildWriterMetrics("foo4.json", null, 2, 55);
        wuState3.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
        wuState3.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics4.toJson());
        addStateToWorkunit(s, wuState3);
        BaseDataPublisher publisher = new BaseDataPublisher(s);
        publisher.publishMetadata(ImmutableList.of(wuState1, wuState2, wuState3));
        checkMetadata(new File(publishPath.getAbsolutePath(), "metadata.json"), 3, 95, new FsWriterMetrics.FileInfo("foo3.json", 30), new FsWriterMetrics.FileInfo("foo1.json", 10), new FsWriterMetrics.FileInfo("foo4.json", 55));
    } finally {
        FileUtils.deleteDirectory(publishPath);
    }
}
Also used : GlobalMetadata(org.apache.gobblin.metadata.types.GlobalMetadata) TaskState(org.apache.hadoop.mapreduce.v2.api.records.TaskState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) FsWriterMetrics(org.apache.gobblin.writer.FsWriterMetrics) File(java.io.File) Test(org.testng.annotations.Test)

Example 54 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class BaseDataPublisherTest method buildTaskState.

private WorkUnitState buildTaskState(int numBranches) {
    SharedResourcesBroker<GobblinScopeTypes> instanceBroker = SharedResourcesBrokerFactory.createDefaultTopLevelBroker(ConfigFactory.empty(), GobblinScopeTypes.GLOBAL.defaultScopeInstance());
    SharedResourcesBroker<GobblinScopeTypes> jobBroker = instanceBroker.newSubscopedBuilder(new JobScopeInstance("LineageEventTest", String.valueOf(System.currentTimeMillis()))).build();
    SharedResourcesBroker<GobblinScopeTypes> taskBroker = jobBroker.newSubscopedBuilder(new TaskScopeInstance("LineageEventTestTask" + String.valueOf(System.currentTimeMillis()))).build();
    WorkUnitState state = new WorkUnitState(WorkUnit.createEmpty(), new State(), taskBroker);
    state.setProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, "namespace");
    state.setProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY, "table");
    state.setProp(ConfigurationKeys.WRITER_FILE_PATH_TYPE, "namespace_table");
    state.setProp(ConfigurationKeys.FORK_BRANCHES_KEY, numBranches);
    state.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/data/output");
    state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, "/data/working");
    if (numBranches > 1) {
        for (int i = 0; i < numBranches; i++) {
            state.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR + "." + i, "/data/output" + "/branch" + i);
            state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR + "." + i, "/data/working" + "/branch" + i);
        }
    }
    return state;
}
Also used : GobblinScopeTypes(org.apache.gobblin.broker.gobblin_scopes.GobblinScopeTypes) JobScopeInstance(org.apache.gobblin.broker.gobblin_scopes.JobScopeInstance) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) TaskState(org.apache.hadoop.mapreduce.v2.api.records.TaskState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) TaskScopeInstance(org.apache.gobblin.broker.gobblin_scopes.TaskScopeInstance)

Example 55 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class BaseDataPublisherTest method testPublishSingleTask.

@Test
public void testPublishSingleTask() throws IOException {
    WorkUnitState state = buildTaskState(1);
    LineageInfo lineageInfo = LineageInfo.getLineageInfo(state.getTaskBroker()).get();
    DatasetDescriptor source = new DatasetDescriptor("kafka", "testTopic");
    lineageInfo.setSource(source, state);
    BaseDataPublisher publisher = new BaseDataPublisher(state);
    publisher.publishData(state);
    Assert.assertTrue(state.contains("gobblin.event.lineage.branch.0.destination"));
    Assert.assertFalse(state.contains("gobblin.event.lineage.branch.1.destination"));
}
Also used : DatasetDescriptor(org.apache.gobblin.dataset.DatasetDescriptor) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) LineageInfo(org.apache.gobblin.metrics.event.lineage.LineageInfo) Test(org.testng.annotations.Test)

Aggregations

WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)222 Test (org.testng.annotations.Test)143 State (org.apache.gobblin.configuration.State)48 SourceState (org.apache.gobblin.configuration.SourceState)39 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)39 Schema (org.apache.avro.Schema)29 Path (org.apache.hadoop.fs.Path)26 GenericRecord (org.apache.avro.generic.GenericRecord)19 JsonObject (com.google.gson.JsonObject)17 ArrayList (java.util.ArrayList)16 File (java.io.File)14 TaskState (org.apache.hadoop.mapreduce.v2.api.records.TaskState)12 List (java.util.List)11 Configuration (org.apache.hadoop.conf.Configuration)11 IOException (java.io.IOException)10 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)10 Extract (org.apache.gobblin.source.workunit.Extract)10 FileSystem (org.apache.hadoop.fs.FileSystem)10 Closer (com.google.common.io.Closer)8 JsonParser (com.google.gson.JsonParser)8