use of org.apache.gobblin.metrics.event.lineage.LineageInfo in project incubator-gobblin by apache.
the class HiveSource method setLineageInfo.
public static void setLineageInfo(ConvertibleHiveDataset convertibleHiveDataset, WorkUnit workUnit, SharedResourcesBroker<GobblinScopeTypes> sharedJobBroker) throws IOException {
String sourceTable = convertibleHiveDataset.getTable().getDbName() + "." + convertibleHiveDataset.getTable().getTableName();
DatasetDescriptor source = new DatasetDescriptor(DatasetConstants.PLATFORM_HIVE, sourceTable);
source.addMetadata(DatasetConstants.FS_URI, convertibleHiveDataset.getTable().getDataLocation().getFileSystem(new Configuration()).getUri().toString());
int virtualBranch = 0;
for (String format : convertibleHiveDataset.getDestFormats()) {
++virtualBranch;
Optional<ConvertibleHiveDataset.ConversionConfig> conversionConfigForFormat = convertibleHiveDataset.getConversionConfigForFormat(format);
Optional<LineageInfo> lineageInfo = LineageInfo.getLineageInfo(sharedJobBroker);
if (!lineageInfo.isPresent()) {
continue;
} else if (!conversionConfigForFormat.isPresent()) {
continue;
}
String destTable = conversionConfigForFormat.get().getDestinationDbName() + "." + conversionConfigForFormat.get().getDestinationTableName();
DatasetDescriptor dest = new DatasetDescriptor(DatasetConstants.PLATFORM_HIVE, destTable);
Path destPath = new Path(conversionConfigForFormat.get().getDestinationDataPath());
dest.addMetadata(DatasetConstants.FS_URI, destPath.getFileSystem(new Configuration()).getUri().toString());
lineageInfo.get().setSource(source, workUnit);
lineageInfo.get().putDestination(dest, virtualBranch, workUnit);
}
}
use of org.apache.gobblin.metrics.event.lineage.LineageInfo in project incubator-gobblin by apache.
the class BaseDataPublisherTest method testPublishSingleTask.
@Test
public void testPublishSingleTask() throws IOException {
WorkUnitState state = buildTaskState(1);
LineageInfo lineageInfo = LineageInfo.getLineageInfo(state.getTaskBroker()).get();
DatasetDescriptor source = new DatasetDescriptor("kafka", "testTopic");
lineageInfo.setSource(source, state);
BaseDataPublisher publisher = new BaseDataPublisher(state);
publisher.publishData(state);
Assert.assertTrue(state.contains("gobblin.event.lineage.branch.0.destination"));
Assert.assertFalse(state.contains("gobblin.event.lineage.branch.1.destination"));
}
use of org.apache.gobblin.metrics.event.lineage.LineageInfo in project incubator-gobblin by apache.
the class SalesforceSourceTest method testSourceLineageInfo.
@Test
void testSourceLineageInfo() {
SourceState sourceState = new SourceState();
sourceState.setProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, "salesforce");
sourceState.setProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY, "snapshot_append");
sourceState.setProp(Partitioner.HAS_USER_SPECIFIED_PARTITIONS, true);
sourceState.setProp(Partitioner.USER_SPECIFIED_PARTITIONS, "20140213000000,20170407152123");
sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_EXTRACT_TYPE, "SNAPSHOT");
QueryBasedSource.SourceEntity sourceEntity = QueryBasedSource.SourceEntity.fromSourceEntityName("contacts");
SalesforceSource source = new SalesforceSource(new LineageInfo(ConfigFactory.empty()));
List<WorkUnit> workUnits = source.generateWorkUnits(sourceEntity, sourceState, 20140213000000L);
Assert.assertEquals(workUnits.size(), 1);
DatasetDescriptor sourceDataset = new DatasetDescriptor("salesforce", "contacts");
Gson gson = new Gson();
Assert.assertEquals(gson.toJson(sourceDataset), workUnits.get(0).getProp("gobblin.event.lineage.source"));
Assert.assertEquals(workUnits.get(0).getProp("gobblin.event.lineage.name"), "contacts");
}
use of org.apache.gobblin.metrics.event.lineage.LineageInfo in project incubator-gobblin by apache.
the class BaseDataPublisherTest method testPublishMultiTasks.
@Test
public void testPublishMultiTasks() throws IOException {
WorkUnitState state1 = buildTaskState(2);
WorkUnitState state2 = buildTaskState(2);
LineageInfo lineageInfo = LineageInfo.getLineageInfo(state1.getTaskBroker()).get();
DatasetDescriptor source = new DatasetDescriptor("kafka", "testTopic");
lineageInfo.setSource(source, state1);
lineageInfo.setSource(source, state2);
BaseDataPublisher publisher = new BaseDataPublisher(state1);
publisher.publishData(ImmutableList.of(state1, state2));
Assert.assertTrue(state1.contains("gobblin.event.lineage.branch.0.destination"));
Assert.assertTrue(state1.contains("gobblin.event.lineage.branch.1.destination"));
Assert.assertTrue(state2.contains("gobblin.event.lineage.branch.0.destination"));
Assert.assertTrue(state2.contains("gobblin.event.lineage.branch.1.destination"));
}
Aggregations