Search in sources :

Example 1 with RecordEnvelope

use of org.apache.gobblin.stream.RecordEnvelope in project incubator-gobblin by apache.

the class MultiWriterWatermarkManagerTest method testFlakyWatermarkStorage.

/**
 * Test that in the presence of intermittent commit successes and failures, we continue to make progress
 */
@Test
public void testFlakyWatermarkStorage() throws IOException, InterruptedException {
    final int failEvery = 2;
    WatermarkStorage mockWatermarkStorage = new WatermarkStorage() {

        private int watermarkInstance = 0;

        private List<CheckpointableWatermark> checkpointed = new ArrayList<>();

        @Override
        public void commitWatermarks(java.lang.Iterable<CheckpointableWatermark> watermarks) throws IOException {
            ++watermarkInstance;
            if (watermarkInstance % failEvery == 0) {
                throw new IOException("Failed to write");
            } else {
                checkpointed.clear();
                for (CheckpointableWatermark watermark : watermarks) {
                    checkpointed.add(watermark);
                }
            }
        }

        @Override
        public Map<String, CheckpointableWatermark> getCommittedWatermarks(Class<? extends CheckpointableWatermark> watermarkClass, Iterable<String> sourcePartitions) throws IOException {
            return null;
        }
    };
    WatermarkAwareWriter mockWatermarkWriter = new WatermarkAwareWriter() {

        private long watermark = 0;

        @Override
        public boolean isWatermarkCapable() {
            return true;
        }

        @Override
        public void writeEnvelope(RecordEnvelope recordEnvelope) throws IOException {
            throw new UnsupportedOperationException();
        }

        @Override
        public Map<String, CheckpointableWatermark> getCommittableWatermark() {
            watermark++;
            return Collections.singletonMap("default", (CheckpointableWatermark) new DefaultCheckpointableWatermark("default", new LongWatermark(watermark)));
        }

        @Override
        public Map<String, CheckpointableWatermark> getUnacknowledgedWatermark() {
            return null;
        }

        @Override
        public void write(Object record) throws IOException {
            throw new UnsupportedOperationException();
        }

        @Override
        public void commit() throws IOException {
            throw new UnsupportedOperationException();
        }

        @Override
        public void cleanup() throws IOException {
            throw new UnsupportedOperationException();
        }

        @Override
        public long recordsWritten() {
            return 0;
        }

        @Override
        public long bytesWritten() throws IOException {
            return 0;
        }

        @Override
        public void close() throws IOException {
        }
    };
    MultiWriterWatermarkManager watermarkManager = new MultiWriterWatermarkManager(mockWatermarkStorage, 1000, Optional.<Logger>absent());
    watermarkManager.registerWriter(mockWatermarkWriter);
    try {
        watermarkManager.start();
    } catch (Exception e) {
        Assert.fail("Should not throw exception", e);
    }
    Thread.sleep(2000);
    watermarkManager.close();
    MultiWriterWatermarkManager.CommitStatus commitStatus = watermarkManager.getCommitStatus();
    System.out.println(commitStatus);
    MultiWriterWatermarkManager.RetrievalStatus retrievalStatus = watermarkManager.getRetrievalStatus();
    Assert.assertTrue(retrievalStatus.getLastWatermarkRetrievalAttemptTimestampMillis() > 0);
    Assert.assertTrue(retrievalStatus.getLastWatermarkRetrievalSuccessTimestampMillis() > 0);
    Assert.assertTrue(retrievalStatus.getLastWatermarkRetrievalFailureTimestampMillis() == 0);
    System.out.println(retrievalStatus);
}
Also used : RecordEnvelope(org.apache.gobblin.stream.RecordEnvelope) IOException(java.io.IOException) DefaultCheckpointableWatermark(org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark) IOException(java.io.IOException) ArrayList(java.util.ArrayList) List(java.util.List) CheckpointableWatermark(org.apache.gobblin.source.extractor.CheckpointableWatermark) DefaultCheckpointableWatermark(org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark) LongWatermark(org.apache.gobblin.source.extractor.extract.LongWatermark) Test(org.testng.annotations.Test)

Example 2 with RecordEnvelope

use of org.apache.gobblin.stream.RecordEnvelope in project incubator-gobblin by apache.

the class InstrumentedExtractorTest method testBase.

public void testBase(InstrumentedExtractorBase<String, String> extractor) throws DataRecordException, IOException {
    RecordStreamWithMetadata<String, String> stream = extractor.recordStream(new AtomicBoolean(false));
    RecordEnvelope<String> r = (RecordEnvelope<String>) stream.getRecordStream().firstOrError().blockingGet();
    Map<String, Long> metrics = MetricsHelper.dumpMetrics(extractor.getMetricContext());
    Assert.assertEquals(metrics.get(MetricNames.ExtractorMetrics.RECORDS_READ_METER), Long.valueOf(1));
    Assert.assertEquals(metrics.get(MetricNames.ExtractorMetrics.RECORDS_FAILED_METER), Long.valueOf(0));
    Assert.assertEquals(metrics.get(MetricNames.ExtractorMetrics.EXTRACT_TIMER), Long.valueOf(1));
    Assert.assertEquals(MetricsHelper.dumpTags(extractor.getMetricContext()).get("construct"), Constructs.EXTRACTOR.toString());
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) RecordEnvelope(org.apache.gobblin.stream.RecordEnvelope)

Example 3 with RecordEnvelope

use of org.apache.gobblin.stream.RecordEnvelope in project incubator-gobblin by apache.

the class CloseOnFlushWriterWrapperTest method testWriteAfterFlush.

@Test
public void testWriteAfterFlush() throws IOException {
    WorkUnitState state = new WorkUnitState();
    state.getJobState().setProp(CloseOnFlushWriterWrapper.WRITER_CLOSE_ON_FLUSH_KEY, "true");
    List<DummyWriter> dummyWriters = new ArrayList<>();
    CloseOnFlushWriterWrapper<byte[]> writer = getCloseOnFlushWriter(dummyWriters, state);
    byte[] record = new byte[] { 'a', 'b', 'c', 'd' };
    writer.writeEnvelope(new RecordEnvelope(record));
    writer.getMessageHandler().handleMessage(FlushControlMessage.builder().build());
    Assert.assertEquals(dummyWriters.size(), 1);
    Assert.assertEquals(dummyWriters.get(0).recordsWritten(), 1);
    Assert.assertEquals(dummyWriters.get(0).flushCount, 1);
    Assert.assertEquals(dummyWriters.get(0).closeCount, 1);
    Assert.assertTrue(dummyWriters.get(0).committed);
    Assert.assertEquals(dummyWriters.get(0).handlerCalled, 1);
    writer.writeEnvelope(new RecordEnvelope(record));
    writer.getMessageHandler().handleMessage(FlushControlMessage.builder().build());
    Assert.assertEquals(dummyWriters.size(), 2);
    Assert.assertEquals(dummyWriters.get(1).recordsWritten(), 1);
    Assert.assertEquals(dummyWriters.get(1).flushCount, 1);
    Assert.assertEquals(dummyWriters.get(1).closeCount, 1);
    Assert.assertTrue(dummyWriters.get(1).committed);
    Assert.assertEquals(dummyWriters.get(1).handlerCalled, 1);
}
Also used : RecordEnvelope(org.apache.gobblin.stream.RecordEnvelope) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) ArrayList(java.util.ArrayList) Test(org.testng.annotations.Test)

Example 4 with RecordEnvelope

use of org.apache.gobblin.stream.RecordEnvelope in project incubator-gobblin by apache.

the class PartitionedWriterTest method testWatermarkComputation.

public void testWatermarkComputation(Long committed, Long unacknowledged, Long expected) throws IOException {
    State state = new State();
    state.setProp(ConfigurationKeys.WRITER_PARTITIONER_CLASS, TestPartitioner.class.getCanonicalName());
    String defaultSource = "default";
    WatermarkAwareWriter mockDataWriter = mock(WatermarkAwareWriter.class);
    when(mockDataWriter.isWatermarkCapable()).thenReturn(true);
    when(mockDataWriter.getCommittableWatermark()).thenReturn(Collections.singletonMap(defaultSource, new DefaultCheckpointableWatermark(defaultSource, new LongWatermark(committed))));
    when(mockDataWriter.getUnacknowledgedWatermark()).thenReturn(Collections.singletonMap(defaultSource, new DefaultCheckpointableWatermark(defaultSource, new LongWatermark(unacknowledged))));
    PartitionAwareDataWriterBuilder builder = mock(PartitionAwareDataWriterBuilder.class);
    when(builder.validatePartitionSchema(any(Schema.class))).thenReturn(true);
    when(builder.forPartition(any(GenericRecord.class))).thenReturn(builder);
    when(builder.withWriterId(any(String.class))).thenReturn(builder);
    when(builder.build()).thenReturn(mockDataWriter);
    PartitionedDataWriter writer = new PartitionedDataWriter<String, String>(builder, state);
    RecordEnvelope<String> recordEnvelope = new RecordEnvelope<String>("0");
    recordEnvelope.addCallBack(new AcknowledgableWatermark(new DefaultCheckpointableWatermark(defaultSource, new LongWatermark(0))));
    writer.writeEnvelope(recordEnvelope);
    Map<String, CheckpointableWatermark> watermark = writer.getCommittableWatermark();
    System.out.println(watermark.toString());
    if (expected == null) {
        Assert.assertTrue(watermark.isEmpty(), "Expected watermark to be absent");
    } else {
        Assert.assertTrue(watermark.size() == 1);
        Assert.assertEquals((long) expected, ((LongWatermark) watermark.values().iterator().next().getWatermark()).getValue());
    }
}
Also used : RecordEnvelope(org.apache.gobblin.stream.RecordEnvelope) TestPartitioner(org.apache.gobblin.writer.test.TestPartitioner) Schema(org.apache.avro.Schema) DefaultCheckpointableWatermark(org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark) State(org.apache.gobblin.configuration.State) GenericRecord(org.apache.avro.generic.GenericRecord) CheckpointableWatermark(org.apache.gobblin.source.extractor.CheckpointableWatermark) DefaultCheckpointableWatermark(org.apache.gobblin.source.extractor.DefaultCheckpointableWatermark) LongWatermark(org.apache.gobblin.source.extractor.extract.LongWatermark)

Example 5 with RecordEnvelope

use of org.apache.gobblin.stream.RecordEnvelope in project incubator-gobblin by apache.

the class RetryWriterTest method retryTestNonTransientException.

public void retryTestNonTransientException() throws IOException {
    DataWriter<Void> writer = mock(DataWriter.class);
    doThrow(new NonTransientException()).when(writer).writeEnvelope(any(RecordEnvelope.class));
    DataWriterWrapperBuilder<Void> builder = new DataWriterWrapperBuilder<>(writer, new State());
    DataWriter<Void> retryWriter = builder.build();
    try {
        retryWriter.writeEnvelope(new RecordEnvelope<>(null));
        Assert.fail("Should have failed.");
    } catch (Exception e) {
    }
    verify(writer, atMost(1)).writeEnvelope(any(RecordEnvelope.class));
}
Also used : NonTransientException(org.apache.gobblin.exception.NonTransientException) RecordEnvelope(org.apache.gobblin.stream.RecordEnvelope) State(org.apache.gobblin.configuration.State) FinalState(org.apache.gobblin.util.FinalState) NonTransientException(org.apache.gobblin.exception.NonTransientException) IOException(java.io.IOException)

Aggregations

RecordEnvelope (org.apache.gobblin.stream.RecordEnvelope)23 Test (org.testng.annotations.Test)13 State (org.apache.gobblin.configuration.State)7 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)7 IOException (java.io.IOException)6 ArrayList (java.util.ArrayList)5 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)4 RecordStreamWithMetadata (org.apache.gobblin.records.RecordStreamWithMetadata)4 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)4 FinalState (org.apache.gobblin.util.FinalState)4 CheckpointableWatermark (org.apache.gobblin.source.extractor.CheckpointableWatermark)3 List (java.util.List)2 Properties (java.util.Properties)2 BasicAckableForTesting (org.apache.gobblin.ack.BasicAckableForTesting)2 DataConversionException (org.apache.gobblin.converter.DataConversionException)2 NonTransientException (org.apache.gobblin.exception.NonTransientException)2 TaskPublisher (org.apache.gobblin.publisher.TaskPublisher)2 RowLevelPolicyCheckResults (org.apache.gobblin.qualitychecker.row.RowLevelPolicyCheckResults)2 RowLevelPolicyChecker (org.apache.gobblin.qualitychecker.row.RowLevelPolicyChecker)2 TaskLevelPolicyCheckResults (org.apache.gobblin.qualitychecker.task.TaskLevelPolicyCheckResults)2