Search in sources :

Example 16 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class CloseOnFlushWriterWrapperTest method testWriteAfterFlush.

@Test
public void testWriteAfterFlush() throws IOException {
    WorkUnitState state = new WorkUnitState();
    state.getJobState().setProp(CloseOnFlushWriterWrapper.WRITER_CLOSE_ON_FLUSH_KEY, "true");
    List<DummyWriter> dummyWriters = new ArrayList<>();
    CloseOnFlushWriterWrapper<byte[]> writer = getCloseOnFlushWriter(dummyWriters, state);
    byte[] record = new byte[] { 'a', 'b', 'c', 'd' };
    writer.writeEnvelope(new RecordEnvelope(record));
    writer.getMessageHandler().handleMessage(FlushControlMessage.builder().build());
    Assert.assertEquals(dummyWriters.size(), 1);
    Assert.assertEquals(dummyWriters.get(0).recordsWritten(), 1);
    Assert.assertEquals(dummyWriters.get(0).flushCount, 1);
    Assert.assertEquals(dummyWriters.get(0).closeCount, 1);
    Assert.assertTrue(dummyWriters.get(0).committed);
    Assert.assertEquals(dummyWriters.get(0).handlerCalled, 1);
    writer.writeEnvelope(new RecordEnvelope(record));
    writer.getMessageHandler().handleMessage(FlushControlMessage.builder().build());
    Assert.assertEquals(dummyWriters.size(), 2);
    Assert.assertEquals(dummyWriters.get(1).recordsWritten(), 1);
    Assert.assertEquals(dummyWriters.get(1).flushCount, 1);
    Assert.assertEquals(dummyWriters.get(1).closeCount, 1);
    Assert.assertTrue(dummyWriters.get(1).committed);
    Assert.assertEquals(dummyWriters.get(1).handlerCalled, 1);
}
Also used : RecordEnvelope(org.apache.gobblin.stream.RecordEnvelope) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) ArrayList(java.util.ArrayList) Test(org.testng.annotations.Test)

Example 17 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class MetadataWriterWrapperTest method testAppendsMetadataWithNormalRecord.

@Test
public void testAppendsMetadataWithNormalRecord() throws IOException {
    state = new WorkUnitState();
    dummyWriter = new MetadataDummyWriter();
    writer = new MetadataWriterWrapper<>(dummyWriter, byte[].class, 1, 0, state.getJobState());
    byte[] recordBytes = new byte[] { 'a', 'b', 'c', 'd' };
    writer.write(recordBytes);
    writer.commit();
    String writerMetadata = state.getProp(ConfigurationKeys.WRITER_METADATA_KEY);
    Assert.assertNotNull(writerMetadata, "Expected there to be metadata");
    Assert.assertNotEquals(writerMetadata.indexOf("\"default-encoding\""), -1, "Expected to find default metadata in metadata");
}
Also used : WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) Test(org.testng.annotations.Test)

Example 18 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class HiveTask method executePublishQueries.

protected void executePublishQueries(QueryBasedHivePublishEntity publishEntity) {
    Set<String> cleanUpQueries = Sets.newLinkedHashSet();
    Set<String> publishQueries = Sets.newLinkedHashSet();
    List<String> directoriesToDelete = Lists.newArrayList();
    FileSystem fs = null;
    try {
        fs = HiveSource.getSourceFs(workUnitState);
        if (publishEntity.getCleanupQueries() != null) {
            cleanUpQueries.addAll(publishEntity.getCleanupQueries());
        }
        if (publishEntity.getCleanupDirectories() != null) {
            directoriesToDelete.addAll(publishEntity.getCleanupDirectories());
        }
        if (publishEntity.getPublishDirectories() != null) {
            // Publish snapshot / partition directories
            Map<String, String> publishDirectories = publishEntity.getPublishDirectories();
            try {
                for (Map.Entry<String, String> publishDir : publishDirectories.entrySet()) {
                    HadoopUtils.renamePath(fs, new Path(publishDir.getKey()), new Path(publishDir.getValue()), true);
                }
            } catch (Throwable t) {
                throw Throwables.propagate(t);
            }
        }
        if (publishEntity.getPublishQueries() != null) {
            publishQueries.addAll(publishEntity.getPublishQueries());
        }
        WorkUnitState wus = this.workUnitState;
        this.hiveJdbcConnector.executeStatements(publishQueries.toArray(new String[publishQueries.size()]));
        wus.setWorkingState(WorkUnitState.WorkingState.COMMITTED);
        if (wus.getPropAsBoolean(USE_WATERMARKER_KEY, true)) {
            HiveSourceWatermarker watermarker = GobblinConstructorUtils.invokeConstructor(HiveSourceWatermarkerFactory.class, wus.getProp(HiveSource.HIVE_SOURCE_WATERMARKER_FACTORY_CLASS_KEY, HiveSource.DEFAULT_HIVE_SOURCE_WATERMARKER_FACTORY_CLASS)).createFromState(wus);
            watermarker.setActualHighWatermark(wus);
        }
    } catch (RuntimeException re) {
        throw re;
    } catch (Exception e) {
        log.error("Error in HiveMaterializer generate publish queries", e);
    } finally {
        try {
            this.hiveJdbcConnector.executeStatements(cleanUpQueries.toArray(new String[cleanUpQueries.size()]));
            HadoopUtils.deleteDirectories(fs, directoriesToDelete, true, true);
        } catch (RuntimeException re) {
            throw re;
        } catch (Exception e) {
            log.error("Failed to cleanup staging entities.", e);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) SQLException(java.sql.SQLException) HiveSourceWatermarker(org.apache.gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarker) FileSystem(org.apache.hadoop.fs.FileSystem) HiveSourceWatermarkerFactory(org.apache.gobblin.data.management.conversion.hive.watermarker.HiveSourceWatermarkerFactory) Map(java.util.Map)

Example 19 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class OldApiHadoopFileInputSourceTest method testGetWorkUnitsAndExtractor.

@Test
public void testGetWorkUnitsAndExtractor() throws IOException, DataRecordException {
    OldApiHadoopFileInputSource<String, Text, LongWritable, Text> fileInputSource = new TestHadoopFileInputSource();
    List<WorkUnit> workUnitList = fileInputSource.getWorkunits(this.sourceState);
    Assert.assertEquals(workUnitList.size(), 1);
    WorkUnitState workUnitState = new WorkUnitState(workUnitList.get(0));
    Closer closer = Closer.create();
    try {
        OldApiHadoopFileInputExtractor<String, Text, LongWritable, Text> extractor = (OldApiHadoopFileInputExtractor<String, Text, LongWritable, Text>) fileInputSource.getExtractor(workUnitState);
        Text text = extractor.readRecord(null);
        Assert.assertEquals(text.toString(), TEXT);
        Assert.assertNull(extractor.readRecord(null));
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }
}
Also used : Closer(com.google.common.io.Closer) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) Test(org.testng.annotations.Test)

Example 20 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class ConstructStateTest method test.

@Test
public void test() {
    ConstructState constructState = new ConstructState();
    WorkUnitState workUnitState = new WorkUnitState();
    String overrideKey = "overrideMe";
    String nonOverrideKey = "overrideMe.not";
    String workUnitToken = "workUnit";
    String constructToken = "construct";
    workUnitState.setProp(overrideKey, workUnitToken);
    workUnitState.setProp(nonOverrideKey, workUnitToken);
    constructState.addOverwriteProperties(ImmutableMap.<String, String>builder().put(overrideKey, constructToken).build());
    constructState.setProp(nonOverrideKey, constructToken);
    constructState.mergeIntoWorkUnitState(workUnitState);
    Assert.assertEquals(workUnitState.getProp(overrideKey), constructToken);
    Assert.assertEquals(workUnitState.getProp(nonOverrideKey), workUnitToken);
    Assert.assertEquals(workUnitState.getPropertyNames().size(), 3);
}
Also used : WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) Test(org.testng.annotations.Test)

Aggregations

WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)222 Test (org.testng.annotations.Test)143 State (org.apache.gobblin.configuration.State)48 SourceState (org.apache.gobblin.configuration.SourceState)39 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)39 Schema (org.apache.avro.Schema)29 Path (org.apache.hadoop.fs.Path)26 GenericRecord (org.apache.avro.generic.GenericRecord)19 JsonObject (com.google.gson.JsonObject)17 ArrayList (java.util.ArrayList)16 File (java.io.File)14 TaskState (org.apache.hadoop.mapreduce.v2.api.records.TaskState)12 List (java.util.List)11 Configuration (org.apache.hadoop.conf.Configuration)11 IOException (java.io.IOException)10 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)10 Extract (org.apache.gobblin.source.workunit.Extract)10 FileSystem (org.apache.hadoop.fs.FileSystem)10 Closer (com.google.common.io.Closer)8 JsonParser (com.google.gson.JsonParser)8