use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class LineageEventTest method testEvent.
@Test
public void testEvent() {
final String topic = "testTopic";
final String kafka = "kafka";
final String hdfs = "hdfs";
final String mysql = "mysql";
final String branch = "branch";
State state0 = new State();
LineageInfo lineageInfo = getLineageInfo();
DatasetDescriptor source = new DatasetDescriptor(kafka, topic);
lineageInfo.setSource(source, state0);
DatasetDescriptor destination00 = new DatasetDescriptor(hdfs, "/data/dbchanges");
destination00.addMetadata(branch, "0");
lineageInfo.putDestination(destination00, 0, state0);
DatasetDescriptor destination01 = new DatasetDescriptor(mysql, "kafka.testTopic");
destination01.addMetadata(branch, "1");
lineageInfo.putDestination(destination01, 1, state0);
Map<String, LineageEventBuilder> events = LineageInfo.load(state0);
verify(events.get("0"), topic, source, destination00);
verify(events.get("1"), topic, source, destination01);
State state1 = new State();
lineageInfo.setSource(source, state1);
List<State> states = Lists.newArrayList();
states.add(state0);
states.add(state1);
// Test only full fledged lineage events are loaded
Collection<LineageEventBuilder> eventsList = LineageInfo.load(states);
Assert.assertTrue(eventsList.size() == 2);
Assert.assertEquals(getLineageEvent(eventsList, 0, hdfs), events.get("0"));
Assert.assertEquals(getLineageEvent(eventsList, 1, mysql), events.get("1"));
// There are 3 full fledged lineage events
DatasetDescriptor destination12 = new DatasetDescriptor(mysql, "kafka.testTopic2");
destination12.addMetadata(branch, "2");
lineageInfo.putDestination(destination12, 2, state1);
eventsList = LineageInfo.load(states);
Assert.assertTrue(eventsList.size() == 3);
Assert.assertEquals(getLineageEvent(eventsList, 0, hdfs), events.get("0"));
Assert.assertEquals(getLineageEvent(eventsList, 1, mysql), events.get("1"));
verify(getLineageEvent(eventsList, 2, mysql), topic, source, destination12);
// There 5 lineage events put, but only 4 unique lineage events
DatasetDescriptor destination10 = destination12;
lineageInfo.putDestination(destination10, 0, state1);
DatasetDescriptor destination11 = new DatasetDescriptor("hive", "kafka.testTopic1");
destination11.addMetadata(branch, "1");
lineageInfo.putDestination(destination11, 1, state1);
eventsList = LineageInfo.load(states);
Assert.assertTrue(eventsList.size() == 4);
Assert.assertEquals(getLineageEvent(eventsList, 0, hdfs), events.get("0"));
Assert.assertEquals(getLineageEvent(eventsList, 1, mysql), events.get("1"));
// Either branch 0 or 2 of state 1 is selected
LineageEventBuilder event12 = getLineageEvent(eventsList, 0, mysql);
if (event12 == null) {
event12 = getLineageEvent(eventsList, 2, mysql);
}
verify(event12, topic, source, destination12);
verify(getLineageEvent(eventsList, 1, "hive"), topic, source, destination11);
}
use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class HivePartitionVersionFinder method findVersions.
private List<HivePartitionVersion> findVersions(String name, String urn) throws IOException {
State state = new State(this.state);
Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.HIVE_VERSIONS_WHITELIST), "Missing required property " + ComplianceConfigurationKeys.HIVE_VERSIONS_WHITELIST);
state.setProp(ComplianceConfigurationKeys.HIVE_DATASET_WHITELIST, this.state.getProp(ComplianceConfigurationKeys.HIVE_VERSIONS_WHITELIST));
setVersions(name, state);
log.info("Found " + this.versions.size() + " versions for the dataset " + urn);
return this.versions;
}
use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class KafkaSimpleStreamingTest method getStreamingExtractor.
private KafkaSimpleStreamingExtractor<String, byte[]> getStreamingExtractor(String topic) {
_kafkaTestHelper.provisionTopic(topic);
List<WorkUnit> lWu = getWorkUnits(topic);
WorkUnit wU = lWu.get(0);
WorkUnitState wSU = new WorkUnitState(wU, new State());
wSU.setProp(ConfigurationKeys.KAFKA_BROKERS, "localhost:" + _kafkaTestHelper.getKafkaServerPort());
wSU.setProp(KafkaSimpleStreamingSource.TOPIC_WHITELIST, topic);
wSU.setProp(ConfigurationKeys.JOB_NAME_KEY, topic);
wSU.setProp(KafkaSimpleStreamingSource.TOPIC_KEY_DESERIALIZER, "org.apache.kafka.common.serialization.StringDeserializer");
wSU.setProp(KafkaSimpleStreamingSource.TOPIC_VALUE_DESERIALIZER, "org.apache.kafka.common.serialization.ByteArrayDeserializer");
// Create an extractor
return new KafkaSimpleStreamingExtractor<String, byte[]>(wSU);
}
use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class AbstractKafkaDataWriterBuilder method build.
/**
* Build a {@link DataWriter}.
*
* @throws IOException if there is anything wrong building the writer
* @return the built {@link DataWriter}
*/
@Override
public DataWriter<D> build() throws IOException {
State state = this.destination.getProperties();
Properties taskProps = state.getProperties();
Config config = ConfigUtils.propertiesToConfig(taskProps);
long commitTimeoutMillis = ConfigUtils.getLong(config, KafkaWriterConfigurationKeys.COMMIT_TIMEOUT_MILLIS_CONFIG, KafkaWriterConfigurationKeys.COMMIT_TIMEOUT_MILLIS_DEFAULT);
long commitStepWaitTimeMillis = ConfigUtils.getLong(config, KafkaWriterConfigurationKeys.COMMIT_STEP_WAIT_TIME_CONFIG, KafkaWriterConfigurationKeys.COMMIT_STEP_WAIT_TIME_DEFAULT);
double failureAllowance = ConfigUtils.getDouble(config, KafkaWriterConfigurationKeys.FAILURE_ALLOWANCE_PCT_CONFIG, KafkaWriterConfigurationKeys.FAILURE_ALLOWANCE_PCT_DEFAULT) / 100.0;
return AsyncWriterManager.builder().config(config).commitTimeoutMillis(commitTimeoutMillis).commitStepWaitTimeInMillis(commitStepWaitTimeMillis).failureAllowanceRatio(failureAllowance).retriesEnabled(false).asyncDataWriter(getAsyncDataWriter(taskProps)).build();
}
use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.
the class GoogleDriveFsHelperTest method closeTest.
public void closeTest() throws IOException, FileBasedHelperException {
State state = new State();
setUp();
GoogleDriveFsHelper fsHelper = new GoogleDriveFsHelper(state, client, Closer.create());
Get getResult = mock(Get.class);
InputStream is = mock(InputStream.class);
when(client.files()).thenReturn(files);
when(files.get(anyString())).thenReturn(getResult);
when(getResult.executeMediaAsInputStream()).thenReturn(is);
fsHelper.getFileStream("test");
fsHelper.close();
verify(is, times(1)).close();
}
Aggregations