use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class StringFilterConverterTest method testConvertRecordWithSimpleRegex.
/**
* Test for {@link StringFilterConverter#convertRecord(Class, String, WorkUnitState)} with a regex that is only a
* sequence of letters.
*/
@Test
public void testConvertRecordWithSimpleRegex() throws DataConversionException {
WorkUnitState workUnitState = new WorkUnitState();
workUnitState.setProp(ConfigurationKeys.CONVERTER_STRING_FILTER_PATTERN, "HelloWorld");
StringFilterConverter converter = new StringFilterConverter();
converter.init(workUnitState);
// Test that HelloWorld matches the pattern HelloWorld
String test = "HelloWorld";
Iterator<String> itr = converter.convertRecord(String.class, test, workUnitState).iterator();
Assert.assertTrue(itr.hasNext());
Assert.assertEquals(itr.next(), test);
Assert.assertTrue(!itr.hasNext());
// Test that Hello does not match the pattern HelloWorld
test = "Hello";
itr = converter.convertRecord(String.class, test, workUnitState).iterator();
Assert.assertTrue(!itr.hasNext());
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class StringFilterConverterTest method testConvertRecordWithComplexRegex.
/**
* Test for {@link StringFilterConverter#convertRecord(Class, String, WorkUnitState)} with a regex that actually uses
* regex features, such as wildcards.
*/
@Test
public void testConvertRecordWithComplexRegex() throws DataConversionException {
WorkUnitState workUnitState = new WorkUnitState();
workUnitState.setProp(ConfigurationKeys.CONVERTER_STRING_FILTER_PATTERN, ".*");
StringFilterConverter converter = new StringFilterConverter();
converter.init(workUnitState);
// Test that HelloWorld matches the pattern .*
String test = "HelloWorld";
Iterator<String> itr = converter.convertRecord(String.class, test, workUnitState).iterator();
Assert.assertTrue(itr.hasNext());
Assert.assertEquals(itr.next(), test);
Assert.assertTrue(!itr.hasNext());
// Test that Java matches the pattern .*
test = "Java";
itr = converter.convertRecord(String.class, test, workUnitState).iterator();
Assert.assertTrue(itr.hasNext());
Assert.assertEquals(itr.next(), test);
Assert.assertTrue(!itr.hasNext());
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class BaseDataPublisherTest method testWithFsMetricsNoPartitions.
@Test
public void testWithFsMetricsNoPartitions() throws IOException {
File publishPath = Files.createTempDir();
try {
State s = buildDefaultState(1);
String md = new GlobalMetadata().toJson();
s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR);
s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, "true");
s.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
s.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, publishPath.getAbsolutePath());
s.setProp(ConfigurationKeys.DATA_PUBLISHER_APPEND_EXTRACT_TO_FINAL_DIR, "false");
s.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_FILE, "metadata.json");
WorkUnitState wuState1 = new WorkUnitState();
FsWriterMetrics metrics1 = buildWriterMetrics("foo1.json", null, 0, 10);
wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics1.toJson());
wuState1.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
addStateToWorkunit(s, wuState1);
WorkUnitState wuState2 = new WorkUnitState();
FsWriterMetrics metrics3 = buildWriterMetrics("foo3.json", null, 1, 30);
wuState2.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
wuState2.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics3.toJson());
addStateToWorkunit(s, wuState2);
WorkUnitState wuState3 = new WorkUnitState();
FsWriterMetrics metrics4 = buildWriterMetrics("foo4.json", null, 2, 55);
wuState3.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
wuState3.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics4.toJson());
addStateToWorkunit(s, wuState3);
BaseDataPublisher publisher = new BaseDataPublisher(s);
publisher.publishMetadata(ImmutableList.of(wuState1, wuState2, wuState3));
checkMetadata(new File(publishPath.getAbsolutePath(), "metadata.json"), 3, 95, new FsWriterMetrics.FileInfo("foo3.json", 30), new FsWriterMetrics.FileInfo("foo1.json", 10), new FsWriterMetrics.FileInfo("foo4.json", 55));
} finally {
FileUtils.deleteDirectory(publishPath);
}
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class BaseDataPublisherTest method buildTaskState.
private WorkUnitState buildTaskState(int numBranches) {
SharedResourcesBroker<GobblinScopeTypes> instanceBroker = SharedResourcesBrokerFactory.createDefaultTopLevelBroker(ConfigFactory.empty(), GobblinScopeTypes.GLOBAL.defaultScopeInstance());
SharedResourcesBroker<GobblinScopeTypes> jobBroker = instanceBroker.newSubscopedBuilder(new JobScopeInstance("LineageEventTest", String.valueOf(System.currentTimeMillis()))).build();
SharedResourcesBroker<GobblinScopeTypes> taskBroker = jobBroker.newSubscopedBuilder(new TaskScopeInstance("LineageEventTestTask" + String.valueOf(System.currentTimeMillis()))).build();
WorkUnitState state = new WorkUnitState(WorkUnit.createEmpty(), new State(), taskBroker);
state.setProp(ConfigurationKeys.EXTRACT_NAMESPACE_NAME_KEY, "namespace");
state.setProp(ConfigurationKeys.EXTRACT_TABLE_NAME_KEY, "table");
state.setProp(ConfigurationKeys.WRITER_FILE_PATH_TYPE, "namespace_table");
state.setProp(ConfigurationKeys.FORK_BRANCHES_KEY, numBranches);
state.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/data/output");
state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, "/data/working");
if (numBranches > 1) {
for (int i = 0; i < numBranches; i++) {
state.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR + "." + i, "/data/output" + "/branch" + i);
state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR + "." + i, "/data/working" + "/branch" + i);
}
}
return state;
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class BaseDataPublisherTest method testPublishSingleTask.
@Test
public void testPublishSingleTask() throws IOException {
WorkUnitState state = buildTaskState(1);
LineageInfo lineageInfo = LineageInfo.getLineageInfo(state.getTaskBroker()).get();
DatasetDescriptor source = new DatasetDescriptor("kafka", "testTopic");
lineageInfo.setSource(source, state);
BaseDataPublisher publisher = new BaseDataPublisher(state);
publisher.publishData(state);
Assert.assertTrue(state.contains("gobblin.event.lineage.branch.0.destination"));
Assert.assertFalse(state.contains("gobblin.event.lineage.branch.1.destination"));
}
Aggregations