Search in sources :

Example 36 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class LineageEventTest method testEvent.

@Test
public void testEvent() {
    final String topic = "testTopic";
    final String kafka = "kafka";
    final String hdfs = "hdfs";
    final String mysql = "mysql";
    final String branch = "branch";
    State state0 = new State();
    LineageInfo lineageInfo = getLineageInfo();
    DatasetDescriptor source = new DatasetDescriptor(kafka, topic);
    lineageInfo.setSource(source, state0);
    DatasetDescriptor destination00 = new DatasetDescriptor(hdfs, "/data/dbchanges");
    destination00.addMetadata(branch, "0");
    lineageInfo.putDestination(destination00, 0, state0);
    DatasetDescriptor destination01 = new DatasetDescriptor(mysql, "kafka.testTopic");
    destination01.addMetadata(branch, "1");
    lineageInfo.putDestination(destination01, 1, state0);
    Map<String, LineageEventBuilder> events = LineageInfo.load(state0);
    verify(events.get("0"), topic, source, destination00);
    verify(events.get("1"), topic, source, destination01);
    State state1 = new State();
    lineageInfo.setSource(source, state1);
    List<State> states = Lists.newArrayList();
    states.add(state0);
    states.add(state1);
    // Test only full fledged lineage events are loaded
    Collection<LineageEventBuilder> eventsList = LineageInfo.load(states);
    Assert.assertTrue(eventsList.size() == 2);
    Assert.assertEquals(getLineageEvent(eventsList, 0, hdfs), events.get("0"));
    Assert.assertEquals(getLineageEvent(eventsList, 1, mysql), events.get("1"));
    // There are 3 full fledged lineage events
    DatasetDescriptor destination12 = new DatasetDescriptor(mysql, "kafka.testTopic2");
    destination12.addMetadata(branch, "2");
    lineageInfo.putDestination(destination12, 2, state1);
    eventsList = LineageInfo.load(states);
    Assert.assertTrue(eventsList.size() == 3);
    Assert.assertEquals(getLineageEvent(eventsList, 0, hdfs), events.get("0"));
    Assert.assertEquals(getLineageEvent(eventsList, 1, mysql), events.get("1"));
    verify(getLineageEvent(eventsList, 2, mysql), topic, source, destination12);
    // There 5 lineage events put, but only 4 unique lineage events
    DatasetDescriptor destination10 = destination12;
    lineageInfo.putDestination(destination10, 0, state1);
    DatasetDescriptor destination11 = new DatasetDescriptor("hive", "kafka.testTopic1");
    destination11.addMetadata(branch, "1");
    lineageInfo.putDestination(destination11, 1, state1);
    eventsList = LineageInfo.load(states);
    Assert.assertTrue(eventsList.size() == 4);
    Assert.assertEquals(getLineageEvent(eventsList, 0, hdfs), events.get("0"));
    Assert.assertEquals(getLineageEvent(eventsList, 1, mysql), events.get("1"));
    // Either branch 0 or 2 of state 1 is selected
    LineageEventBuilder event12 = getLineageEvent(eventsList, 0, mysql);
    if (event12 == null) {
        event12 = getLineageEvent(eventsList, 2, mysql);
    }
    verify(event12, topic, source, destination12);
    verify(getLineageEvent(eventsList, 1, "hive"), topic, source, destination11);
}
Also used : DatasetDescriptor(org.apache.gobblin.dataset.DatasetDescriptor) State(org.apache.gobblin.configuration.State) Test(org.testng.annotations.Test)

Example 37 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class HivePartitionVersionFinder method findVersions.

private List<HivePartitionVersion> findVersions(String name, String urn) throws IOException {
    State state = new State(this.state);
    Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.HIVE_VERSIONS_WHITELIST), "Missing required property " + ComplianceConfigurationKeys.HIVE_VERSIONS_WHITELIST);
    state.setProp(ComplianceConfigurationKeys.HIVE_DATASET_WHITELIST, this.state.getProp(ComplianceConfigurationKeys.HIVE_VERSIONS_WHITELIST));
    setVersions(name, state);
    log.info("Found " + this.versions.size() + " versions for the dataset " + urn);
    return this.versions;
}
Also used : State(org.apache.gobblin.configuration.State)

Example 38 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class KafkaSimpleStreamingTest method getStreamingExtractor.

private KafkaSimpleStreamingExtractor<String, byte[]> getStreamingExtractor(String topic) {
    _kafkaTestHelper.provisionTopic(topic);
    List<WorkUnit> lWu = getWorkUnits(topic);
    WorkUnit wU = lWu.get(0);
    WorkUnitState wSU = new WorkUnitState(wU, new State());
    wSU.setProp(ConfigurationKeys.KAFKA_BROKERS, "localhost:" + _kafkaTestHelper.getKafkaServerPort());
    wSU.setProp(KafkaSimpleStreamingSource.TOPIC_WHITELIST, topic);
    wSU.setProp(ConfigurationKeys.JOB_NAME_KEY, topic);
    wSU.setProp(KafkaSimpleStreamingSource.TOPIC_KEY_DESERIALIZER, "org.apache.kafka.common.serialization.StringDeserializer");
    wSU.setProp(KafkaSimpleStreamingSource.TOPIC_VALUE_DESERIALIZER, "org.apache.kafka.common.serialization.ByteArrayDeserializer");
    // Create an extractor
    return new KafkaSimpleStreamingExtractor<String, byte[]>(wSU);
}
Also used : KafkaSimpleStreamingExtractor(org.apache.gobblin.source.extractor.extract.kafka.KafkaSimpleStreamingExtractor) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) SourceState(org.apache.gobblin.configuration.SourceState) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit)

Example 39 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class AbstractKafkaDataWriterBuilder method build.

/**
 * Build a {@link DataWriter}.
 *
 * @throws IOException if there is anything wrong building the writer
 * @return the built {@link DataWriter}
 */
@Override
public DataWriter<D> build() throws IOException {
    State state = this.destination.getProperties();
    Properties taskProps = state.getProperties();
    Config config = ConfigUtils.propertiesToConfig(taskProps);
    long commitTimeoutMillis = ConfigUtils.getLong(config, KafkaWriterConfigurationKeys.COMMIT_TIMEOUT_MILLIS_CONFIG, KafkaWriterConfigurationKeys.COMMIT_TIMEOUT_MILLIS_DEFAULT);
    long commitStepWaitTimeMillis = ConfigUtils.getLong(config, KafkaWriterConfigurationKeys.COMMIT_STEP_WAIT_TIME_CONFIG, KafkaWriterConfigurationKeys.COMMIT_STEP_WAIT_TIME_DEFAULT);
    double failureAllowance = ConfigUtils.getDouble(config, KafkaWriterConfigurationKeys.FAILURE_ALLOWANCE_PCT_CONFIG, KafkaWriterConfigurationKeys.FAILURE_ALLOWANCE_PCT_DEFAULT) / 100.0;
    return AsyncWriterManager.builder().config(config).commitTimeoutMillis(commitTimeoutMillis).commitStepWaitTimeInMillis(commitStepWaitTimeMillis).failureAllowanceRatio(failureAllowance).retriesEnabled(false).asyncDataWriter(getAsyncDataWriter(taskProps)).build();
}
Also used : State(org.apache.gobblin.configuration.State) Config(com.typesafe.config.Config) Properties(java.util.Properties)

Example 40 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class GoogleDriveFsHelperTest method closeTest.

public void closeTest() throws IOException, FileBasedHelperException {
    State state = new State();
    setUp();
    GoogleDriveFsHelper fsHelper = new GoogleDriveFsHelper(state, client, Closer.create());
    Get getResult = mock(Get.class);
    InputStream is = mock(InputStream.class);
    when(client.files()).thenReturn(files);
    when(files.get(anyString())).thenReturn(getResult);
    when(getResult.executeMediaAsInputStream()).thenReturn(is);
    fsHelper.getFileStream("test");
    fsHelper.close();
    verify(is, times(1)).close();
}
Also used : State(org.apache.gobblin.configuration.State) InputStream(java.io.InputStream)

Aggregations

State (org.apache.gobblin.configuration.State)195 Test (org.testng.annotations.Test)103 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)74 SourceState (org.apache.gobblin.configuration.SourceState)38 Path (org.apache.hadoop.fs.Path)30 File (java.io.File)20 IOException (java.io.IOException)16 Map (java.util.Map)14 WorkingState (org.apache.gobblin.configuration.WorkUnitState.WorkingState)14 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)14 TaskState (org.apache.hadoop.mapreduce.v2.api.records.TaskState)13 Properties (java.util.Properties)12 FinalState (org.apache.gobblin.util.FinalState)12 Configuration (org.apache.hadoop.conf.Configuration)12 TaskLevelPolicyCheckResults (org.apache.gobblin.qualitychecker.task.TaskLevelPolicyCheckResults)9 Config (com.typesafe.config.Config)8 ArrayList (java.util.ArrayList)8 GenericRecord (org.apache.avro.generic.GenericRecord)8 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)7 FileInputStream (java.io.FileInputStream)6