Search in sources :

Example 61 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class BytesToAvroConverterTest method testCanParseBinary.

@Test
public void testCanParseBinary() throws DataConversionException, SchemaConversionException, IOException {
    InputStream schemaIn = getClass().getClassLoader().getResourceAsStream(RESOURCE_PATH_PREFIX + "test_record_schema.avsc");
    InputStream recordIn = getClass().getClassLoader().getResourceAsStream(RESOURCE_PATH_PREFIX + "test_record_binary.avro");
    Assert.assertNotNull("Could not load test schema from resources", schemaIn);
    Assert.assertNotNull("Could not load test record from resources", recordIn);
    BytesToAvroConverter converter = new BytesToAvroConverter();
    WorkUnitState state = new WorkUnitState();
    converter.init(state);
    Schema schema = converter.convertSchema(IOUtils.toString(schemaIn, StandardCharsets.UTF_8), state);
    Assert.assertEquals(schema.getName(), "testRecord");
    Iterator<GenericRecord> records = converter.convertRecord(schema, IOUtils.toByteArray(recordIn), state).iterator();
    GenericRecord record = records.next();
    Assert.assertFalse("Expected only 1 record", records.hasNext());
    Assert.assertEquals(record.get("testStr").toString(), "testing123");
    Assert.assertEquals(record.get("testInt"), -2);
}
Also used : InputStream(java.io.InputStream) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) Schema(org.apache.avro.Schema) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.testng.annotations.Test)

Example 62 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class StringSplitterConverterTest method testConvertRecord.

/**
 * Test that {@link StringSplitterConverter#convertRecord(Class, String, WorkUnitState)} properly splits a String by
 * a specified delimiter.
 */
@Test
public void testConvertRecord() throws DataConversionException {
    String delimiter = "\t";
    WorkUnitState workUnitState = new WorkUnitState();
    workUnitState.setProp(ConfigurationKeys.CONVERTER_STRING_SPLITTER_DELIMITER, delimiter);
    StringSplitterConverter converter = new StringSplitterConverter();
    converter.init(workUnitState);
    // Test that the iterator returned by convertRecord is of length 1 when the delimiter is not in the inputRecord
    String test = "HelloWorld";
    Iterator<String> itr = converter.convertRecord(String.class, test, workUnitState).iterator();
    Assert.assertTrue(itr.hasNext());
    Assert.assertEquals(itr.next(), test);
    Assert.assertTrue(!itr.hasNext());
    // Test that the iterator returned by convertRecord is of length 2 when the delimiter is in the middle of two strings
    String test1 = "Hello";
    String test2 = "World";
    test = test1 + delimiter + test2;
    itr = converter.convertRecord(String.class, test, workUnitState).iterator();
    Assert.assertTrue(itr.hasNext());
    Assert.assertEquals(itr.next(), test1);
    Assert.assertTrue(itr.hasNext());
    Assert.assertEquals(itr.next(), test2);
    Assert.assertTrue(!itr.hasNext());
    // Test that the iterator returned by convertRecord is of length 2 even when the delimiter occurs multiple times in
    // between the same two strings, and if the delimiter occurs at the end and beginning of the inputRecord
    test1 = "Hello";
    test2 = "World";
    test = delimiter + test1 + delimiter + delimiter + test2 + delimiter;
    itr = converter.convertRecord(String.class, test, workUnitState).iterator();
    Assert.assertTrue(itr.hasNext());
    Assert.assertEquals(itr.next(), test1);
    Assert.assertTrue(itr.hasNext());
    Assert.assertEquals(itr.next(), test2);
    Assert.assertTrue(!itr.hasNext());
}
Also used : WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) Test(org.testng.annotations.Test)

Example 63 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class IdentityForkOperatorTest method testForkMethods.

@Test
public void testForkMethods() {
    ForkOperator<String, String> dummyForkOperator = new IdentityForkOperator<String, String>();
    WorkUnitState workUnitState = new WorkUnitState();
    workUnitState.setProp(ConfigurationKeys.FORK_BRANCHES_KEY, 2);
    List<Boolean> schemas = dummyForkOperator.forkSchema(workUnitState, "");
    Assert.assertEquals(schemas, Arrays.asList(true, true));
    List<Boolean> records = dummyForkOperator.forkDataRecord(workUnitState, "");
    Assert.assertEquals(records, Arrays.asList(true, true));
    Assert.assertEquals(dummyForkOperator.getBranches(workUnitState), 2);
    workUnitState.setProp(ConfigurationKeys.FORK_BRANCHES_KEY, 0);
    schemas = dummyForkOperator.forkSchema(workUnitState, "");
    Assert.assertTrue(schemas.isEmpty());
    records = dummyForkOperator.forkDataRecord(workUnitState, "");
    Assert.assertTrue(records.isEmpty());
    Assert.assertEquals(dummyForkOperator.getBranches(workUnitState), 0);
}
Also used : WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) Test(org.testng.annotations.Test)

Example 64 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class BaseDataPublisherTest method testBogusMetadataMerger.

/**
 * Test that an exception is properly thrown if we configure a merger that doesn't actually implement
 * MetadataMerger
 */
@Test(expectedExceptions = IllegalArgumentException.class)
public void testBogusMetadataMerger() throws IOException {
    State s = buildDefaultState(1);
    s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, "true");
    s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_MERGER_NAME_KEY, "java.lang.String");
    s.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_STR, "foobar");
    WorkUnitState wuState = new WorkUnitState();
    addStateToWorkunit(s, wuState);
    BaseDataPublisher publisher = new BaseDataPublisher(s);
    publisher.publishMetadata(Collections.singletonList(wuState));
}
Also used : TaskState(org.apache.hadoop.mapreduce.v2.api.records.TaskState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) Test(org.testng.annotations.Test)

Example 65 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class BaseDataPublisherTest method testWithPartitionKey.

@Test
public void testWithPartitionKey() throws IOException {
    File publishPath = Files.createTempDir();
    try {
        File part1 = new File(publishPath, "1-2-3-4");
        part1.mkdir();
        File part2 = new File(publishPath, "5-6-7-8");
        part2.mkdir();
        State s = buildDefaultState(1);
        String md = new GlobalMetadata().toJson();
        s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR);
        s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, "true");
        s.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
        s.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, publishPath.getAbsolutePath());
        s.setProp(ConfigurationKeys.DATA_PUBLISHER_APPEND_EXTRACT_TO_FINAL_DIR, "false");
        s.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_FILE, "metadata.json");
        WorkUnitState wuState1 = new WorkUnitState();
        wuState1.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "1-2-3-4");
        wuState1.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
        addStateToWorkunit(s, wuState1);
        WorkUnitState wuState2 = new WorkUnitState();
        wuState2.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "5-6-7-8");
        wuState2.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md);
        addStateToWorkunit(s, wuState2);
        BaseDataPublisher publisher = new BaseDataPublisher(s);
        publisher.publishMetadata(ImmutableList.of(wuState1, wuState2));
        Assert.assertTrue(new File(part1, "metadata.json").exists());
        Assert.assertTrue(new File(part2, "metadata.json").exists());
    } finally {
        FileUtils.deleteDirectory(publishPath);
    }
}
Also used : GlobalMetadata(org.apache.gobblin.metadata.types.GlobalMetadata) TaskState(org.apache.hadoop.mapreduce.v2.api.records.TaskState) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) File(java.io.File) Test(org.testng.annotations.Test)

Aggregations

WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)222 Test (org.testng.annotations.Test)143 State (org.apache.gobblin.configuration.State)48 SourceState (org.apache.gobblin.configuration.SourceState)39 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)39 Schema (org.apache.avro.Schema)29 Path (org.apache.hadoop.fs.Path)26 GenericRecord (org.apache.avro.generic.GenericRecord)19 JsonObject (com.google.gson.JsonObject)17 ArrayList (java.util.ArrayList)16 File (java.io.File)14 TaskState (org.apache.hadoop.mapreduce.v2.api.records.TaskState)12 List (java.util.List)11 Configuration (org.apache.hadoop.conf.Configuration)11 IOException (java.io.IOException)10 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)10 Extract (org.apache.gobblin.source.workunit.Extract)10 FileSystem (org.apache.hadoop.fs.FileSystem)10 Closer (com.google.common.io.Closer)8 JsonParser (com.google.gson.JsonParser)8