use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class DatasetUtilsTest method testGetDatasetSpecificState.
@Test
public void testGetDatasetSpecificState() {
String dataset1 = "testDataset1";
String dataset2 = "testDataset2";
String dataset3 = "testDataset3";
String testKey1 = "testKey1";
String testValue1 = "testValue1";
SourceState state = new SourceState();
state.setProp(DatasetUtils.DATASET_SPECIFIC_PROPS, "[{\"dataset\" : \"" + dataset1 + "\", \"" + testKey1 + "\" : \"" + testValue1 + "\"}, {\"dataset\" : \"" + dataset2 + "\", \"" + testKey1 + "\" : \"" + testValue1 + "\"}]");
Map<String, State> datasetSpecificStateMap = DatasetUtils.getDatasetSpecificProps(Lists.newArrayList(dataset1, dataset3), state);
State dataset1ExpectedState = new State();
dataset1ExpectedState.setProp(testKey1, testValue1);
Assert.assertEquals(datasetSpecificStateMap.get(dataset1), dataset1ExpectedState);
Assert.assertNull(datasetSpecificStateMap.get(dataset2));
Assert.assertNull(datasetSpecificStateMap.get(dataset3));
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class DatasetUtilsTest method testGetDatasetSpecificStateWithRegex.
@Test
public void testGetDatasetSpecificStateWithRegex() {
String dataset1 = "testDataset1";
String dataset2 = "testDataset2";
String dataset3 = "otherTestDataset1";
String testKey1 = "testKey1";
String testValue1 = "testValue1";
SourceState state = new SourceState();
state.setProp(DatasetUtils.DATASET_SPECIFIC_PROPS, "[{\"dataset\" : \"testDataset.*\", \"" + testKey1 + "\" : \"" + testValue1 + "\"}]");
Map<String, State> datasetSpecificStateMap = DatasetUtils.getDatasetSpecificProps(Lists.newArrayList(dataset1, dataset2, dataset3), state);
State dataset1ExpectedState = new State();
dataset1ExpectedState.setProp(testKey1, testValue1);
State dataset2ExpectedState = new State();
dataset2ExpectedState.setProp(testKey1, testValue1);
Assert.assertEquals(datasetSpecificStateMap.get(dataset1), dataset1ExpectedState);
Assert.assertEquals(datasetSpecificStateMap.get(dataset2), dataset2ExpectedState);
Assert.assertNull(datasetSpecificStateMap.get(dataset3));
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class RegexBasedPartitionedRetrieverTest method testLeadtime.
@Test
public void testLeadtime() throws IOException {
String snapshotRegex = "(\\d+)-PT-\\d+";
RegexBasedPartitionedRetriever r = new RegexBasedPartitionedRetriever("txt");
SourceState state = new SourceState();
state.setProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, "file:///");
state.setProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY, tempDir.toString());
state.setProp(PartitionedFileSourceBase.DATE_PARTITIONED_SOURCE_PARTITION_PATTERN, snapshotRegex);
state.setProp(PartitionedFileSourceBase.DATE_PARTITIONED_SOURCE_PARTITION_LEAD_TIME_GRANULARITY, "DAY");
state.setProp(PartitionedFileSourceBase.DATE_PARTITIONED_SOURCE_PARTITION_LEAD_TIME, "1");
r.init(state);
List<PartitionAwareFileRetriever.FileInfo> files = r.getFilesToProcess(DateToUse.APR_3_2017.getValue() - 1, 9999);
Assert.assertEquals(files.size(), 2);
verifyFile(files.get(0), DateToUse.APR_3_2017.getValue());
verifyFile(files.get(1), DateToUse.MAY_1_2017.getValue());
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class DatePartitionedAvroFileExtractorTest method testJobStateNotCopiedToWorkUnit.
@Test
public void testJobStateNotCopiedToWorkUnit() {
DatePartitionedAvroFileSource source = new DatePartitionedAvroFileSource();
SourceState state = new SourceState();
state.setProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, ConfigurationKeys.LOCAL_FS_URI);
state.setProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, SOURCE_ENTITY);
state.setProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY, OUTPUT_DIR + Path.SEPARATOR + SOURCE_ENTITY);
state.setProp(ConfigurationKeys.SOURCE_ENTITY, SOURCE_ENTITY);
state.setProp(ConfigurationKeys.SOURCE_MAX_NUMBER_OF_PARTITIONS, 2);
state.setProp("date.partitioned.source.partition.pattern", DATE_PATTERN);
state.setProp("date.partitioned.source.min.watermark.value", DateTimeFormat.forPattern(DATE_PATTERN).print(this.startDateTime.minusMinutes(1)));
state.setProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY, TableType.SNAPSHOT_ONLY);
state.setProp("date.partitioned.source.partition.prefix", PREFIX);
state.setProp("date.partitioned.source.partition.suffix", SUFFIX);
String dummyKey = "dummy.job.config";
state.setProp(dummyKey, "dummy");
List<WorkUnit> workunits = source.getWorkunits(state);
Assert.assertEquals(workunits.size(), 4);
for (WorkUnit wu : workunits) {
if (wu instanceof MultiWorkUnit) {
for (WorkUnit workUnit : ((MultiWorkUnit) wu).getWorkUnits()) {
Assert.assertFalse(workUnit.contains(dummyKey));
}
} else {
Assert.assertFalse(wu.contains(dummyKey));
}
}
}
use of org.apache.gobblin.configuration.SourceState in project incubator-gobblin by apache.
the class DatePartitionedAvroFileExtractorTest method testReadPartitionsByMinute.
@Test
public void testReadPartitionsByMinute() throws IOException, DataRecordException {
DatePartitionedAvroFileSource source = new DatePartitionedAvroFileSource();
SourceState state = new SourceState();
state.setProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, ConfigurationKeys.LOCAL_FS_URI);
state.setProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, SOURCE_ENTITY);
state.setProp(ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY, OUTPUT_DIR + Path.SEPARATOR + SOURCE_ENTITY);
state.setProp(ConfigurationKeys.SOURCE_ENTITY, SOURCE_ENTITY);
state.setProp(ConfigurationKeys.SOURCE_MAX_NUMBER_OF_PARTITIONS, 2);
state.setProp("date.partitioned.source.partition.pattern", DATE_PATTERN);
state.setProp("date.partitioned.source.min.watermark.value", DateTimeFormat.forPattern(DATE_PATTERN).print(this.startDateTime.minusMinutes(1)));
state.setProp(ConfigurationKeys.EXTRACT_TABLE_TYPE_KEY, TableType.SNAPSHOT_ONLY);
state.setProp("date.partitioned.source.partition.prefix", PREFIX);
state.setProp("date.partitioned.source.partition.suffix", SUFFIX);
// Read data partitioned by minutes, i.e each workunit is assigned records under the same YYYY/MM/dd/HH_mm directory
List<WorkUnit> workunits = source.getWorkunits(state);
Assert.assertEquals(workunits.size(), 4);
verifyWorkUnits(workunits);
}
Aggregations