Search in sources :

Example 26 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class HiveRegistrationPolicyBaseTest method testGetHiveSpecsWithDBFilter.

@Test
public void testGetHiveSpecsWithDBFilter() throws IOException {
    State state = new State();
    state.appendToListProp(HiveRegistrationPolicyBase.HIVE_DATABASE_NAME, "db1");
    state.appendToListProp(HiveRegistrationPolicyBase.ADDITIONAL_HIVE_DATABASE_NAMES, "db2");
    state.appendToListProp(HiveRegistrationPolicyBase.HIVE_TABLE_NAME, "tbl1");
    state.appendToListProp(HiveRegistrationPolicyBase.ADDITIONAL_HIVE_TABLE_NAMES, "tbl2,tbl3,$PRIMARY_TABLE_col");
    state.appendToListProp("db2." + HiveRegistrationPolicyBase.HIVE_TABLE_NAME, "$PRIMARY_TABLE_col,tbl4,tbl5");
    this.path = new Path(getClass().getResource("/test-hive-table").toString());
    Collection<HiveSpec> specs = new HiveRegistrationPolicyBase(state).getHiveSpecs(this.path);
    Assert.assertEquals(specs.size(), 7);
    Iterator<HiveSpec> iterator = specs.iterator();
    HiveSpec spec = iterator.next();
    examine(spec, "db1", "tbl1");
    spec = iterator.next();
    examine(spec, "db1", "tbl2");
    spec = iterator.next();
    examine(spec, "db1", "tbl3");
    spec = iterator.next();
    examine(spec, "db1", "tbl1_col");
    spec = iterator.next();
    examine(spec, "db2", "tbl1_col");
    spec = iterator.next();
    examine(spec, "db2", "tbl4");
    spec = iterator.next();
    examine(spec, "db2", "tbl5");
}
Also used : Path(org.apache.hadoop.fs.Path) State(org.apache.gobblin.configuration.State) HiveSpec(org.apache.gobblin.hive.spec.HiveSpec) SimpleHiveSpec(org.apache.gobblin.hive.spec.SimpleHiveSpec) Test(org.testng.annotations.Test)

Example 27 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class FsStateStoreTest method testPut.

@Test
public void testPut() throws IOException {
    List<State> states = Lists.newArrayList();
    State state1 = new State();
    state1.setId("s1");
    state1.setProp("k1", "v1");
    states.add(state1);
    State state2 = new State();
    state2.setId("s2");
    state2.setProp("k2", "v2");
    states.add(state2);
    State state3 = new State();
    state3.setId("s3");
    state3.setProp("k3", "v3");
    states.add(state3);
    Assert.assertFalse(this.stateStore.exists("testStore", "testTable"));
    this.stateStore.putAll("testStore", "testTable", states);
    Assert.assertTrue(this.stateStore.exists("testStore", "testTable"));
    // for testing of getStoreNames
    this.stateStore.putAll("testStore2", "testTable", states);
}
Also used : State(org.apache.gobblin.configuration.State) Test(org.testng.annotations.Test)

Example 28 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class AzkabanJobLauncher method initJobListener.

protected JobListener initJobListener() {
    CompositeJobListener compositeJobListener = new CompositeJobListener();
    List<String> listeners = new State(props).getPropAsList(GOBBLIN_CUSTOM_JOB_LISTENERS, EmailNotificationJobListener.class.getSimpleName());
    try {
        for (String listenerAlias : listeners) {
            ClassAliasResolver<JobListener> conditionClassAliasResolver = new ClassAliasResolver<>(JobListener.class);
            compositeJobListener.addJobListener(conditionClassAliasResolver.resolveClass(listenerAlias).newInstance());
        }
    } catch (IllegalAccessException | InstantiationException | ClassNotFoundException e) {
        throw new IllegalArgumentException(e);
    }
    return compositeJobListener;
}
Also used : EmailNotificationJobListener(org.apache.gobblin.runtime.listeners.EmailNotificationJobListener) JobListener(org.apache.gobblin.runtime.listeners.JobListener) CompositeJobListener(org.apache.gobblin.runtime.listeners.CompositeJobListener) EmailNotificationJobListener(org.apache.gobblin.runtime.listeners.EmailNotificationJobListener) CompositeJobListener(org.apache.gobblin.runtime.listeners.CompositeJobListener) State(org.apache.gobblin.configuration.State) ClassAliasResolver(org.apache.gobblin.util.ClassAliasResolver)

Example 29 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class HivePurgerExtractor method createPurgeableHivePartitionDataset.

private PurgeableHivePartitionDataset createPurgeableHivePartitionDataset(State state) throws IOException {
    HivePartitionDataset hivePartitionDataset = HivePartitionFinder.findDataset(state.getProp(ComplianceConfigurationKeys.PARTITION_NAME), state);
    Preconditions.checkArgument(state.contains(ComplianceConfigurationKeys.COMPLIANCEID_KEY), "Missing property " + ComplianceConfigurationKeys.COMPLIANCEID_KEY);
    Preconditions.checkArgument(state.contains(ComplianceConfigurationKeys.COMPLIANCE_ID_TABLE_KEY), "Missing property " + ComplianceConfigurationKeys.COMPLIANCE_ID_TABLE_KEY);
    Preconditions.checkArgument(state.contains(ComplianceConfigurationKeys.TIMESTAMP), "Missing table property " + ComplianceConfigurationKeys.TIMESTAMP);
    Boolean simulate = state.getPropAsBoolean(ComplianceConfigurationKeys.COMPLIANCE_JOB_SIMULATE, ComplianceConfigurationKeys.DEFAULT_COMPLIANCE_JOB_SIMULATE);
    String complianceIdentifier = state.getProp(ComplianceConfigurationKeys.COMPLIANCEID_KEY);
    String complianceIdTable = state.getProp(ComplianceConfigurationKeys.COMPLIANCE_ID_TABLE_KEY);
    String timeStamp = state.getProp(ComplianceConfigurationKeys.TIMESTAMP);
    Boolean specifyPartitionFormat = state.getPropAsBoolean(ComplianceConfigurationKeys.SPECIFY_PARTITION_FORMAT, ComplianceConfigurationKeys.DEFAULT_SPECIFY_PARTITION_FORMAT);
    State datasetState = new State();
    datasetState.addAll(state.getProperties());
    PurgeableHivePartitionDataset dataset = new PurgeableHivePartitionDataset(hivePartitionDataset);
    dataset.setComplianceId(complianceIdentifier);
    dataset.setComplianceIdTable(complianceIdTable);
    dataset.setComplianceField(getComplianceField(state, hivePartitionDataset));
    dataset.setTimeStamp(timeStamp);
    dataset.setState(datasetState);
    dataset.setSimulate(simulate);
    dataset.setSpecifyPartitionFormat(specifyPartitionFormat);
    return dataset;
}
Also used : HivePartitionDataset(org.apache.gobblin.compliance.HivePartitionDataset) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State)

Example 30 with State

use of org.apache.gobblin.configuration.State in project incubator-gobblin by apache.

the class HivePurgerSource method initialize.

@VisibleForTesting
protected void initialize(SourceState state) throws IOException {
    setTimeStamp();
    setLowWatermark(state);
    setExecutionCount(state);
    this.metricContext = Instrumented.getMetricContext(state, this.getClass());
    this.eventSubmitter = new EventSubmitter.Builder(this.metricContext, ComplianceEvents.NAMESPACE).build();
    submitCycleCompletionEvent();
    this.maxWorkUnits = state.getPropAsInt(ComplianceConfigurationKeys.MAX_WORKUNITS_KEY, ComplianceConfigurationKeys.DEFAULT_MAX_WORKUNITS);
    this.maxWorkUnitExecutionAttempts = state.getPropAsInt(ComplianceConfigurationKeys.MAX_WORKUNIT_EXECUTION_ATTEMPTS_KEY, ComplianceConfigurationKeys.DEFAULT_MAX_WORKUNIT_EXECUTION_ATTEMPTS);
    // TODO: Event submitter and metrics will be added later
    String datasetFinderClass = state.getProp(ComplianceConfigurationKeys.GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS, HivePartitionFinder.class.getName());
    this.datasetFinder = GobblinConstructorUtils.invokeConstructor(DatasetsFinder.class, datasetFinderClass, state);
    populateDatasets();
    String policyClass = state.getProp(ComplianceConfigurationKeys.PURGE_POLICY_CLASS, HivePurgerPolicy.class.getName());
    this.policy = GobblinConstructorUtils.invokeConstructor(PurgePolicy.class, policyClass, this.lowWatermark);
    this.shouldProxy = state.getPropAsBoolean(ComplianceConfigurationKeys.GOBBLIN_COMPLIANCE_SHOULD_PROXY, ComplianceConfigurationKeys.GOBBLIN_COMPLIANCE_DEFAULT_SHOULD_PROXY);
    if (!this.shouldProxy) {
        return;
    }
    // cancel tokens
    try {
        ProxyUtils.cancelTokens(new State(state));
    } catch (InterruptedException | TException e) {
        throw new IOException(e);
    }
}
Also used : HivePartitionFinder(org.apache.gobblin.compliance.HivePartitionFinder) TException(org.apache.thrift.TException) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) SourceState(org.apache.gobblin.configuration.SourceState) DatasetsFinder(org.apache.gobblin.dataset.DatasetsFinder) IOException(java.io.IOException) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

State (org.apache.gobblin.configuration.State)195 Test (org.testng.annotations.Test)103 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)74 SourceState (org.apache.gobblin.configuration.SourceState)38 Path (org.apache.hadoop.fs.Path)30 File (java.io.File)20 IOException (java.io.IOException)16 Map (java.util.Map)14 WorkingState (org.apache.gobblin.configuration.WorkUnitState.WorkingState)14 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)14 TaskState (org.apache.hadoop.mapreduce.v2.api.records.TaskState)13 Properties (java.util.Properties)12 FinalState (org.apache.gobblin.util.FinalState)12 Configuration (org.apache.hadoop.conf.Configuration)12 TaskLevelPolicyCheckResults (org.apache.gobblin.qualitychecker.task.TaskLevelPolicyCheckResults)9 Config (com.typesafe.config.Config)8 ArrayList (java.util.ArrayList)8 GenericRecord (org.apache.avro.generic.GenericRecord)8 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)7 FileInputStream (java.io.FileInputStream)6