use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class PartitionerTest method testGetHighWatermarkOnAppendExtract.
/**
* Test getHighWatermark. Extract type: Append.
*/
@Test
public void testGetHighWatermarkOnAppendExtract() {
String endValue = "20140101000000";
SourceState sourceState = new SourceState();
sourceState.setProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY, true);
sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_END_VALUE, endValue);
ExtractType extractType = ExtractType.APPEND_DAILY;
TestPartitioner partitioner = new TestPartitioner(sourceState);
Assert.assertEquals(partitioner.getHighWatermark(extractType, null), Long.parseLong(endValue), "High watermark should be " + endValue);
Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), true, "Should mark as user specified high watermark");
partitioner.reset();
// Test non-full-dump cases below
sourceState.removeProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY);
// No limit type
Assert.assertEquals(partitioner.getHighWatermark(ExtractType.APPEND_BATCH, null), ConfigurationKeys.DEFAULT_WATERMARK_VALUE, "High watermark should be " + ConfigurationKeys.DEFAULT_WATERMARK_VALUE);
Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), false, "Should not mark as user specified high watermark");
// No limit delta
long expected = Long.parseLong(TestPartitioner.currentTimeString);
Assert.assertEquals(partitioner.getHighWatermark(extractType, null), expected, "High watermark should be " + expected);
Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), false, "Should not mark as user specified high watermark");
// CURRENTDATE - 1
String maxLimit = "CURRENTDATE-1";
sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_APPEND_MAX_WATERMARK_LIMIT, maxLimit);
Assert.assertEquals(partitioner.getHighWatermark(extractType, null), 20161231235959L, "High watermark should be 20161231235959");
Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), true, "Should not mark as user specified high watermark");
partitioner.reset();
// CURRENTHOUR - 1
maxLimit = "CURRENTHOUR-1";
sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_APPEND_MAX_WATERMARK_LIMIT, maxLimit);
Assert.assertEquals(partitioner.getHighWatermark(extractType, null), 20161231235959L, "High watermark should be 20161231235959");
Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), true, "Should not mark as user specified high watermark");
partitioner.reset();
// CURRENTMINUTE - 1
maxLimit = "CURRENTMINUTE-1";
sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_APPEND_MAX_WATERMARK_LIMIT, maxLimit);
Assert.assertEquals(partitioner.getHighWatermark(extractType, null), 20161231235959L, "High watermark should be 20161231235959");
Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), true, "Should not mark as user specified high watermark");
partitioner.reset();
// CURRENTSECOND - 1
maxLimit = "CURRENTSECOND-1";
sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_APPEND_MAX_WATERMARK_LIMIT, maxLimit);
Assert.assertEquals(partitioner.getHighWatermark(extractType, null), 20161231235959L, "High watermark should be 20161231235959");
Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), true, "Should not mark as user specified high watermark");
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class PartitionerTest method testGetLowWatermarkOnUserOverride.
/**
* Test getLowWatermark. Is watermark override: true.
*/
@Test
public void testGetLowWatermarkOnUserOverride() {
String startValue = "20140101000000";
SourceState sourceState = new SourceState();
sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_IS_WATERMARK_OVERRIDE, true);
sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_START_VALUE, startValue);
TestPartitioner partitioner = new TestPartitioner(sourceState);
Assert.assertEquals(partitioner.getLowWatermark(null, null, -1, 0), Long.parseLong(startValue), "Low watermark should be " + startValue);
// It works for full dump too
sourceState.removeProp(ConfigurationKeys.SOURCE_QUERYBASED_IS_WATERMARK_OVERRIDE);
sourceState.setProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY, true);
Assert.assertEquals(partitioner.getLowWatermark(null, null, -1, 0), Long.parseLong(startValue), "Low watermark should be " + startValue);
// Should return ConfigurationKeys.DEFAULT_WATERMARK_VALUE if no SOURCE_QUERYBASED_START_VALUE is specified
sourceState.removeProp(ConfigurationKeys.SOURCE_QUERYBASED_START_VALUE);
Assert.assertEquals(partitioner.getLowWatermark(null, null, -1, 0), ConfigurationKeys.DEFAULT_WATERMARK_VALUE, "Low watermark should be " + ConfigurationKeys.DEFAULT_WATERMARK_VALUE);
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class PartitionerTest method testGetHighWatermarkOnSnapshotExtract.
/**
* Test getHighWatermark. Extract type: Snapshot.
*/
@Test
public void testGetHighWatermarkOnSnapshotExtract() {
String endValue = "20140101000000";
SourceState sourceState = new SourceState();
// It won't use SOURCE_QUERYBASED_END_VALUE when extract is full
sourceState.setProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY, true);
sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_END_VALUE, endValue);
ExtractType extractType = ExtractType.SNAPSHOT;
TestPartitioner partitioner = new TestPartitioner(sourceState);
Assert.assertEquals(partitioner.getHighWatermark(extractType, WatermarkType.SIMPLE), ConfigurationKeys.DEFAULT_WATERMARK_VALUE, "High watermark should be " + ConfigurationKeys.DEFAULT_WATERMARK_VALUE);
Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), false, "Should not mark as user specified high watermark");
long expected = Long.parseLong(TestPartitioner.currentTimeString);
Assert.assertEquals(partitioner.getHighWatermark(extractType, WatermarkType.TIMESTAMP), expected, "High watermark should be " + expected);
Assert.assertEquals(partitioner.getUserSpecifiedHighWatermark(), false, "Should not mark as user specified high watermark");
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class DatasetFinderSource method createWorkUnitStream.
private Stream<WorkUnit> createWorkUnitStream(SourceState state) throws IOException {
IterableDatasetFinder datasetsFinder = createDatasetsFinder(state);
Stream<Dataset> datasetStream = datasetsFinder.getDatasetsStream(0, null);
if (this.drilldownIntoPartitions) {
return datasetStream.flatMap(dataset -> {
if (dataset instanceof PartitionableDataset) {
try {
return (Stream<PartitionableDataset.DatasetPartition>) ((PartitionableDataset) dataset).getPartitions(0, null);
} catch (IOException ioe) {
log.error("Failed to get partitions for dataset " + dataset.getUrn());
return Stream.empty();
}
} else {
return Stream.of(new DatasetWrapper(dataset));
}
}).map(this::workUnitForPartitionInternal);
} else {
return datasetStream.map(this::workUnitForDataset);
}
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class HivePurgerSource method initialize.
@VisibleForTesting
protected void initialize(SourceState state) throws IOException {
setTimeStamp();
setLowWatermark(state);
setExecutionCount(state);
this.metricContext = Instrumented.getMetricContext(state, this.getClass());
this.eventSubmitter = new EventSubmitter.Builder(this.metricContext, ComplianceEvents.NAMESPACE).build();
submitCycleCompletionEvent();
this.maxWorkUnits = state.getPropAsInt(ComplianceConfigurationKeys.MAX_WORKUNITS_KEY, ComplianceConfigurationKeys.DEFAULT_MAX_WORKUNITS);
this.maxWorkUnitExecutionAttempts = state.getPropAsInt(ComplianceConfigurationKeys.MAX_WORKUNIT_EXECUTION_ATTEMPTS_KEY, ComplianceConfigurationKeys.DEFAULT_MAX_WORKUNIT_EXECUTION_ATTEMPTS);
// TODO: Event submitter and metrics will be added later
String datasetFinderClass = state.getProp(ComplianceConfigurationKeys.GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS, HivePartitionFinder.class.getName());
this.datasetFinder = GobblinConstructorUtils.invokeConstructor(DatasetsFinder.class, datasetFinderClass, state);
populateDatasets();
String policyClass = state.getProp(ComplianceConfigurationKeys.PURGE_POLICY_CLASS, HivePurgerPolicy.class.getName());
this.policy = GobblinConstructorUtils.invokeConstructor(PurgePolicy.class, policyClass, this.lowWatermark);
this.shouldProxy = state.getPropAsBoolean(ComplianceConfigurationKeys.GOBBLIN_COMPLIANCE_SHOULD_PROXY, ComplianceConfigurationKeys.GOBBLIN_COMPLIANCE_DEFAULT_SHOULD_PROXY);
if (!this.shouldProxy) {
return;
}
// cancel tokens
try {
ProxyUtils.cancelTokens(new State(state));
} catch (InterruptedException | TException e) {
throw new IOException(e);
}
}
Aggregations