use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class BytesToAvroConverterTest method testCanParseBinary.
@Test
public void testCanParseBinary() throws DataConversionException, SchemaConversionException, IOException {
InputStream schemaIn = getClass().getClassLoader().getResourceAsStream(RESOURCE_PATH_PREFIX + "test_record_schema.avsc");
InputStream recordIn = getClass().getClassLoader().getResourceAsStream(RESOURCE_PATH_PREFIX + "test_record_binary.avro");
Assert.assertNotNull("Could not load test schema from resources", schemaIn);
Assert.assertNotNull("Could not load test record from resources", recordIn);
BytesToAvroConverter converter = new BytesToAvroConverter();
WorkUnitState state = new WorkUnitState();
converter.init(state);
Schema schema = converter.convertSchema(IOUtils.toString(schemaIn, StandardCharsets.UTF_8), state);
Assert.assertEquals(schema.getName(), "testRecord");
Iterator<GenericRecord> records = converter.convertRecord(schema, IOUtils.toByteArray(recordIn), state).iterator();
GenericRecord record = records.next();
Assert.assertFalse("Expected only 1 record", records.hasNext());
Assert.assertEquals(record.get("testStr").toString(), "testing123");
Assert.assertEquals(record.get("testInt"), -2);
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class StringSplitterConverterTest method testConvertRecord.
/**
* Test that {@link StringSplitterConverter#convertRecord(Class, String, WorkUnitState)} properly splits a String by
* a specified delimiter.
*/
@Test
public void testConvertRecord() throws DataConversionException {
String delimiter = "\t";
WorkUnitState workUnitState = new WorkUnitState();
workUnitState.setProp(ConfigurationKeys.CONVERTER_STRING_SPLITTER_DELIMITER, delimiter);
StringSplitterConverter converter = new StringSplitterConverter();
converter.init(workUnitState);
// Test that the iterator returned by convertRecord is of length 1 when the delimiter is not in the inputRecord
String test = "HelloWorld";
Iterator<String> itr = converter.convertRecord(String.class, test, workUnitState).iterator();
Assert.assertTrue(itr.hasNext());
Assert.assertEquals(itr.next(), test);
Assert.assertTrue(!itr.hasNext());
// Test that the iterator returned by convertRecord is of length 2 when the delimiter is in the middle of two strings
String test1 = "Hello";
String test2 = "World";
test = test1 + delimiter + test2;
itr = converter.convertRecord(String.class, test, workUnitState).iterator();
Assert.assertTrue(itr.hasNext());
Assert.assertEquals(itr.next(), test1);
Assert.assertTrue(itr.hasNext());
Assert.assertEquals(itr.next(), test2);
Assert.assertTrue(!itr.hasNext());
// Test that the iterator returned by convertRecord is of length 2 even when the delimiter occurs multiple times in
// between the same two strings, and if the delimiter occurs at the end and beginning of the inputRecord
test1 = "Hello";
test2 = "World";
test = delimiter + test1 + delimiter + delimiter + test2 + delimiter;
itr = converter.convertRecord(String.class, test, workUnitState).iterator();
Assert.assertTrue(itr.hasNext());
Assert.assertEquals(itr.next(), test1);
Assert.assertTrue(itr.hasNext());
Assert.assertEquals(itr.next(), test2);
Assert.assertTrue(!itr.hasNext());
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class IdentityForkOperatorTest method testForkMethods.
@Test
public void testForkMethods() {
ForkOperator<String, String> dummyForkOperator = new IdentityForkOperator<String, String>();
WorkUnitState workUnitState = new WorkUnitState();
workUnitState.setProp(ConfigurationKeys.FORK_BRANCHES_KEY, 2);
List<Boolean> schemas = dummyForkOperator.forkSchema(workUnitState, "");
Assert.assertEquals(schemas, Arrays.asList(true, true));
List<Boolean> records = dummyForkOperator.forkDataRecord(workUnitState, "");
Assert.assertEquals(records, Arrays.asList(true, true));
Assert.assertEquals(dummyForkOperator.getBranches(workUnitState), 2);
workUnitState.setProp(ConfigurationKeys.FORK_BRANCHES_KEY, 0);
schemas = dummyForkOperator.forkSchema(workUnitState, "");
Assert.assertTrue(schemas.isEmpty());
records = dummyForkOperator.forkDataRecord(workUnitState, "");
Assert.assertTrue(records.isEmpty());
Assert.assertEquals(dummyForkOperator.getBranches(workUnitState), 0);
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class BaseDataPublisherTest method testBogusMetadataMerger.
/**
* Test that an exception is properly thrown if we configure a merger that doesn't actually implement
* MetadataMerger
*/
@Test(expectedExceptions = IllegalArgumentException.class)
public void testBogusMetadataMerger() throws IOException {
State s = buildDefaultState(1);
s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, "true");
s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_MERGER_NAME_KEY, "java.lang.String");
s.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_STR, "foobar");
WorkUnitState wuState = new WorkUnitState();
addStateToWorkunit(s, wuState);
BaseDataPublisher publisher = new BaseDataPublisher(s);
publisher.publishMetadata(Collections.singletonList(wuState));
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class BaseDataPublisherTest method testWithPartitionKey.
@Test
public void testWithPartitionKey() throws IOException {
File publishPath = Files.createTempDir();
try {
File part1 = new File(publishPath, "1-2-3-4");
part1.mkdir();
File part2 = new File(publishPath, "5-6-7-8");
part2.mkdir();
State s = buildDefaultState(1);
String md = new GlobalMetadata().toJson();
s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR);
s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, "true");
s.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
s.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, publishPath.getAbsolutePath());
s.setProp(ConfigurationKeys.DATA_PUBLISHER_APPEND_EXTRACT_TO_FINAL_DIR, "false");
s.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_FILE, "metadata.json");
WorkUnitState wuState1 = new WorkUnitState();
wuState1.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "1-2-3-4");
wuState1.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
addStateToWorkunit(s, wuState1);
WorkUnitState wuState2 = new WorkUnitState();
wuState2.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "5-6-7-8");
wuState2.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
addStateToWorkunit(s, wuState2);
BaseDataPublisher publisher = new BaseDataPublisher(s);
publisher.publishMetadata(ImmutableList.of(wuState1, wuState2));
Assert.assertTrue(new File(part1, "metadata.json").exists());
Assert.assertTrue(new File(part2, "metadata.json").exists());
} finally {
FileUtils.deleteDirectory(publishPath);
}
}
Aggregations