Search in sources :

Example 26 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class HiveSourceTest method testShouldCreateWorkunitsNewerThanLookback.

@Test
public void testShouldCreateWorkunitsNewerThanLookback() throws Exception {
    long currentTime = System.currentTimeMillis();
    // Default lookback time is 3 days
    long partitionCreateTime = new DateTime(currentTime).minusDays(2).getMillis();
    org.apache.hadoop.hive.ql.metadata.Partition partition = this.hiveMetastoreTestUtils.createDummyPartition(partitionCreateTime);
    SourceState testState = getTestState("testDb7");
    HiveSource source = new HiveSource();
    source.initialize(testState);
    boolean isOlderThanLookback = source.isOlderThanLookback(partition);
    Assert.assertEquals(isOlderThanLookback, false, "Should create workunits newer than lookback");
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) HiveSource(org.apache.gobblin.data.management.conversion.hive.source.HiveSource) DateTime(org.joda.time.DateTime) Test(org.testng.annotations.Test)

Example 27 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class DatasetFinderSourceTest method testDrilledDown.

@Test
public void testDrilledDown() {
    Dataset dataset1 = new SimpleDatasetForTesting("dataset1");
    Dataset dataset2 = new SimplePartitionableDatasetForTesting("dataset2", Lists.newArrayList(new SimpleDatasetPartitionForTesting("p1"), new SimpleDatasetPartitionForTesting("p2")));
    Dataset dataset3 = new SimpleDatasetForTesting("dataset3");
    IterableDatasetFinder finder = new StaticDatasetsFinderForTesting(Lists.newArrayList(dataset1, dataset2, dataset3));
    MySource mySource = new MySource(true, finder);
    List<WorkUnit> workUnits = mySource.getWorkunits(new SourceState());
    Assert.assertEquals(workUnits.size(), 4);
    Assert.assertEquals(workUnits.get(0).getProp(DATASET_URN), "dataset1");
    Assert.assertNull(workUnits.get(0).getProp(PARTITION_URN));
    Assert.assertEquals(workUnits.get(1).getProp(DATASET_URN), "dataset2");
    Assert.assertEquals(workUnits.get(1).getProp(PARTITION_URN), "p1");
    Assert.assertEquals(workUnits.get(2).getProp(DATASET_URN), "dataset2");
    Assert.assertEquals(workUnits.get(2).getProp(PARTITION_URN), "p2");
    Assert.assertEquals(workUnits.get(3).getProp(DATASET_URN), "dataset3");
    Assert.assertNull(workUnits.get(3).getProp(PARTITION_URN));
    WorkUnitStream workUnitStream = mySource.getWorkunitStream(new SourceState());
    Assert.assertEquals(Lists.newArrayList(workUnitStream.getWorkUnits()), workUnits);
}
Also used : SimpleDatasetPartitionForTesting(org.apache.gobblin.dataset.test.SimpleDatasetPartitionForTesting) WorkUnitStream(org.apache.gobblin.source.workunit.WorkUnitStream) SimpleDatasetForTesting(org.apache.gobblin.dataset.test.SimpleDatasetForTesting) SourceState(org.apache.gobblin.configuration.SourceState) IterableDatasetFinder(org.apache.gobblin.dataset.IterableDatasetFinder) PartitionableDataset(org.apache.gobblin.dataset.PartitionableDataset) Dataset(org.apache.gobblin.dataset.Dataset) SimplePartitionableDatasetForTesting(org.apache.gobblin.dataset.test.SimplePartitionableDatasetForTesting) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) StaticDatasetsFinderForTesting(org.apache.gobblin.dataset.test.StaticDatasetsFinderForTesting) Test(org.testng.annotations.Test)

Example 28 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class JsonIntermediateToParquetGroupConverterTest method setUp.

@BeforeClass
public static void setUp() {
    Type listType = new TypeToken<JsonObject>() {
    }.getType();
    Gson gson = new Gson();
    JsonObject testData = gson.fromJson(new InputStreamReader(JsonIntermediateToParquetGroupConverter.class.getResourceAsStream(RESOURCE_PATH)), listType);
    testCases = testData.getAsJsonObject();
    SourceState source = new SourceState();
    workUnit = new WorkUnitState(source.createWorkUnit(source.createExtract(Extract.TableType.SNAPSHOT_ONLY, "test_namespace", "test_table")));
}
Also used : MessageType(parquet.schema.MessageType) Type(java.lang.reflect.Type) SourceState(org.apache.gobblin.configuration.SourceState) InputStreamReader(java.io.InputStreamReader) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) JsonObject(com.google.gson.JsonObject) Gson(com.google.gson.Gson) BeforeClass(org.testng.annotations.BeforeClass)

Example 29 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class TaskMetricsTest method testTaskGetMetrics.

@Test
public void testTaskGetMetrics() {
    String jobId = "job_456";
    String taskId = "task_456";
    String jobName = "jobName";
    JobState jobState = new JobState(jobName, jobId);
    JobMetrics jobMetrics = JobMetrics.get(jobState);
    State props = new State();
    props.setProp(ConfigurationKeys.JOB_ID_KEY, jobId);
    props.setProp(ConfigurationKeys.TASK_ID_KEY, taskId);
    SourceState sourceState = new SourceState(props, new ArrayList<WorkUnitState>());
    WorkUnit workUnit = new WorkUnit(sourceState, null);
    WorkUnitState workUnitState = new WorkUnitState(workUnit);
    TaskState taskState = new TaskState(workUnitState);
    TaskMetrics taskMetrics = new TaskMetrics(taskState);
    Assert.assertNotNull(taskMetrics.getMetricContext());
    Assert.assertTrue(taskMetrics.getMetricContext().getParent().isPresent());
    Assert.assertEquals(taskMetrics.getMetricContext().getParent().get(), jobMetrics.getMetricContext());
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) JobState(org.apache.gobblin.runtime.JobState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) SourceState(org.apache.gobblin.configuration.SourceState) State(org.apache.gobblin.configuration.State) TaskState(org.apache.gobblin.runtime.TaskState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) JobState(org.apache.gobblin.runtime.JobState) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) TaskState(org.apache.gobblin.runtime.TaskState) Test(org.testng.annotations.Test)

Example 30 with SourceState

use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.

the class SalesforceSourceTest method testSourceLineageInfo.

@Test
void testSourceLineageInfo() {
    SourceState sourceState = new SourceState();
    sourceState.setProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, "salesforce");
    sourceState.setProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY, "snapshot_append");
    sourceState.setProp(Partitioner.HAS_USER_SPECIFIED_PARTITIONS, true);
    sourceState.setProp(Partitioner.USER_SPECIFIED_PARTITIONS, "20140213000000,20170407152123");
    sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_EXTRACT_TYPE, "SNAPSHOT");
    QueryBasedSource.SourceEntity sourceEntity = QueryBasedSource.SourceEntity.fromSourceEntityName("contacts");
    SalesforceSource source = new SalesforceSource(new LineageInfo(ConfigFactory.empty()));
    List<WorkUnit> workUnits = source.generateWorkUnits(sourceEntity, sourceState, 20140213000000L);
    Assert.assertEquals(workUnits.size(), 1);
    DatasetDescriptor sourceDataset = new DatasetDescriptor("salesforce", "contacts");
    Gson gson = new Gson();
    Assert.assertEquals(gson.toJson(sourceDataset), workUnits.get(0).getProp("gobblin.event.lineage.source"));
    Assert.assertEquals(workUnits.get(0).getProp("gobblin.event.lineage.name"), "contacts");
}
Also used : SourceState(org.apache.gobblin.configuration.SourceState) QueryBasedSource(org.apache.gobblin.source.extractor.extract.QueryBasedSource) DatasetDescriptor(org.apache.gobblin.dataset.DatasetDescriptor) Gson(com.google.gson.Gson) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) LineageInfo(org.apache.gobblin.metrics.event.lineage.LineageInfo) Test(org.testng.annotations.Test)

Aggregations

SourceState (org.apache.gobblin.configuration.SourceState)90 Test (org.testng.annotations.Test)76 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)44 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)38 State (org.apache.gobblin.configuration.State)30 WorkingState (org.apache.gobblin.configuration.WorkUnitState.WorkingState)11 Partition (org.apache.hadoop.hive.ql.metadata.Partition)8 Table (org.apache.hadoop.hive.ql.metadata.Table)8 IterableDatasetFinder (org.apache.gobblin.dataset.IterableDatasetFinder)7 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)7 Extract (org.apache.gobblin.source.workunit.Extract)7 DateTime (org.joda.time.DateTime)7 Dataset (org.apache.gobblin.dataset.Dataset)6 PartitionableDataset (org.apache.gobblin.dataset.PartitionableDataset)6 MultiWorkUnit (org.apache.gobblin.source.workunit.MultiWorkUnit)6 WorkUnitStream (org.apache.gobblin.source.workunit.WorkUnitStream)6 IOException (java.io.IOException)5 Path (org.apache.hadoop.fs.Path)5 Gson (com.google.gson.Gson)4 JsonObject (com.google.gson.JsonObject)4