Search in sources :

Example 21 with RecordEnvelope

use of org.apache.gobblin.stream.RecordEnvelope in project incubator-gobblin by apache.

the class AsyncConverter1to1Test method test1to1.

@Test
public void test1to1() throws Exception {
    MyAsyncConverter1to1 converter = new MyAsyncConverter1to1();
    List<Throwable> errors = Lists.newArrayList();
    AtomicBoolean done = new AtomicBoolean(false);
    WorkUnitState workUnitState = new WorkUnitState();
    workUnitState.setProp(AsyncConverter1to1.MAX_CONCURRENT_ASYNC_CONVERSIONS_KEY, 3);
    RecordStreamWithMetadata<String, String> stream = new RecordStreamWithMetadata<>(Flowable.range(0, 5).map(i -> i.toString()).map(RecordEnvelope::new), GlobalMetadata.<String>builder().schema("schema").build());
    Set<String> outputRecords = Sets.newConcurrentHashSet();
    converter.processStream(stream, workUnitState).getRecordStream().subscribeOn(Schedulers.newThread()).subscribe(r -> outputRecords.add(((RecordEnvelope<String>) r).getRecord()), errors::add, () -> done.set(true));
    // Release record 0
    Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> converter.completeFutureIfPresent("0")).await());
    Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> outputRecords.contains("0")).await());
    Assert.assertEquals(outputRecords.size(), 1);
    // Record 4 should not be in the queue yet (max concurrent conversions is 3).
    Assert.assertFalse(ExponentialBackoff.awaitCondition().maxWait(200L).callable(() -> converter.completeFutureIfPresent("4")).await());
    // Release record 3 (out of order)
    Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> converter.completeFutureIfPresent("3")).await());
    Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> outputRecords.contains("3")).await());
    // only two records have been released
    Assert.assertEquals(outputRecords.size(), 2);
    // Release record 4 (now in queue)
    Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> converter.completeFutureIfPresent("4")).await());
    Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> outputRecords.contains("4")).await());
    Assert.assertEquals(outputRecords.size(), 3);
    // Release records 1 and 2
    Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> converter.completeFutureIfPresent("1")).await());
    Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> converter.completeFutureIfPresent("2")).await());
    Assert.assertTrue(ExponentialBackoff.awaitCondition().maxWait(100L).callable(() -> outputRecords.size() == 5).await());
    Assert.assertEquals(outputRecords, Sets.newHashSet("0", "1", "2", "3", "4"));
    Assert.assertTrue(errors.isEmpty());
    Assert.assertTrue(done.get());
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) RecordEnvelope(org.apache.gobblin.stream.RecordEnvelope) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) RecordStreamWithMetadata(org.apache.gobblin.records.RecordStreamWithMetadata) Test(org.testng.annotations.Test)

Example 22 with RecordEnvelope

use of org.apache.gobblin.stream.RecordEnvelope in project incubator-gobblin by apache.

the class TestRecordStream method testAcks.

@Test
public void testAcks() throws Exception {
    StreamEntity[] entities = new StreamEntity[] { new RecordEnvelope<>("a"), new BasicTestControlMessage("1"), new RecordEnvelope<>("b"), new BasicTestControlMessage("2") };
    BasicAckableForTesting ackable = new BasicAckableForTesting();
    for (int i = 0; i < entities.length; i++) {
        entities[i].addCallBack(ackable);
    }
    MyExtractor extractor = new MyExtractor(entities);
    MyConverter converter = new MyConverter();
    MyDataWriter writer = new MyDataWriter();
    // Create a TaskState
    TaskState taskState = getEmptyTestTaskState("testRetryTaskId");
    taskState.setProp(ConfigurationKeys.TASK_SYNCHRONOUS_EXECUTION_MODEL_KEY, false);
    // Create a mock TaskContext
    TaskContext mockTaskContext = mock(TaskContext.class);
    when(mockTaskContext.getExtractor()).thenReturn(extractor);
    when(mockTaskContext.getForkOperator()).thenReturn(new IdentityForkOperator());
    when(mockTaskContext.getTaskState()).thenReturn(taskState);
    when(mockTaskContext.getConverters()).thenReturn(Lists.newArrayList(converter));
    when(mockTaskContext.getTaskLevelPolicyChecker(any(TaskState.class), anyInt())).thenReturn(mock(TaskLevelPolicyChecker.class));
    when(mockTaskContext.getRowLevelPolicyChecker()).thenReturn(new RowLevelPolicyChecker(Lists.newArrayList(), "ss", FileSystem.getLocal(new Configuration())));
    when(mockTaskContext.getRowLevelPolicyChecker(anyInt())).thenReturn(new RowLevelPolicyChecker(Lists.newArrayList(), "ss", FileSystem.getLocal(new Configuration())));
    when(mockTaskContext.getDataWriterBuilder(anyInt(), anyInt())).thenReturn(writer);
    // Create a mock TaskPublisher
    TaskPublisher mockTaskPublisher = mock(TaskPublisher.class);
    when(mockTaskPublisher.canPublish()).thenReturn(TaskPublisher.PublisherState.SUCCESS);
    when(mockTaskContext.getTaskPublisher(any(TaskState.class), any(TaskLevelPolicyCheckResults.class))).thenReturn(mockTaskPublisher);
    // Create a mock TaskStateTracker
    TaskStateTracker mockTaskStateTracker = mock(TaskStateTracker.class);
    // Create a TaskExecutor - a real TaskExecutor must be created so a Fork is run in a separate thread
    TaskExecutor taskExecutor = new TaskExecutor(new Properties());
    // Create the Task
    Task realTask = new Task(mockTaskContext, mockTaskStateTracker, taskExecutor, Optional.<CountDownLatch>absent());
    Task task = spy(realTask);
    doNothing().when(task).submitTaskCommittedEvent();
    task.run();
    task.commit();
    Assert.assertEquals(task.getTaskState().getWorkingState(), WorkUnitState.WorkingState.SUCCESSFUL);
    Assert.assertEquals(ackable.acked, 4);
}
Also used : TaskPublisher(org.apache.gobblin.publisher.TaskPublisher) Configuration(org.apache.hadoop.conf.Configuration) RecordEnvelope(org.apache.gobblin.stream.RecordEnvelope) TaskLevelPolicyChecker(org.apache.gobblin.qualitychecker.task.TaskLevelPolicyChecker) StreamEntity(org.apache.gobblin.stream.StreamEntity) TaskLevelPolicyCheckResults(org.apache.gobblin.qualitychecker.task.TaskLevelPolicyCheckResults) Properties(java.util.Properties) IdentityForkOperator(org.apache.gobblin.fork.IdentityForkOperator) RowLevelPolicyChecker(org.apache.gobblin.qualitychecker.row.RowLevelPolicyChecker) BasicAckableForTesting(org.apache.gobblin.ack.BasicAckableForTesting) Test(org.testng.annotations.Test)

Example 23 with RecordEnvelope

use of org.apache.gobblin.stream.RecordEnvelope in project incubator-gobblin by apache.

the class Fork method consumeRecordStream.

@SuppressWarnings(value = "RV_RETURN_VALUE_IGNORED", justification = "We actually don't care about the return value of subscribe.")
public void consumeRecordStream(RecordStreamWithMetadata<D, S> stream) throws RecordStreamProcessor.StreamProcessingException {
    if (this.converter instanceof MultiConverter) {
        // if multiconverter, unpack it
        for (Converter cverter : ((MultiConverter) this.converter).getConverters()) {
            stream = cverter.processStream(stream, this.taskState);
        }
    } else {
        stream = this.converter.processStream(stream, this.taskState);
    }
    stream = this.rowLevelPolicyChecker.processStream(stream, this.taskState);
    stream = stream.mapStream(s -> s.map(r -> {
        onEachRecord();
        return r;
    }));
    stream = stream.mapStream(s -> s.doOnSubscribe(subscription -> onStart()));
    stream = stream.mapStream(s -> s.doOnComplete(() -> verifyAndSetForkState(ForkState.RUNNING, ForkState.SUCCEEDED)));
    stream = stream.mapStream(s -> s.doOnCancel(() -> verifyAndSetForkState(ForkState.RUNNING, ForkState.SUCCEEDED)));
    stream = stream.mapStream(s -> s.doOnError(exc -> {
        verifyAndSetForkState(ForkState.RUNNING, ForkState.FAILED);
        this.logger.error(String.format("Fork %d of task %s failed to process data records", this.index, this.taskId), exc);
    }));
    stream = stream.mapStream(s -> s.doFinally(this::cleanup));
    stream.getRecordStream().subscribe(r -> {
        if (r instanceof RecordEnvelope) {
            this.writer.get().writeEnvelope((RecordEnvelope) r);
        } else if (r instanceof ControlMessage) {
            this.writer.get().getMessageHandler().handleMessage((ControlMessage) r);
            r.ack();
        }
    }, e -> logger.error("Failed to process record.", e), () -> {
        if (this.writer.isPresent()) {
            this.writer.get().close();
        }
    });
}
Also used : ForkOperatorUtils(org.apache.gobblin.util.ForkOperatorUtils) Tag(org.apache.gobblin.metrics.Tag) SpeculativeAttemptAwareConstruct(org.apache.gobblin.commit.SpeculativeAttemptAwareConstruct) GobblinMetrics(org.apache.gobblin.metrics.GobblinMetrics) ExecutionModel(org.apache.gobblin.runtime.ExecutionModel) LoggerFactory(org.slf4j.LoggerFactory) ControlMessage(org.apache.gobblin.stream.ControlMessage) BoundedBlockingRecordQueue(org.apache.gobblin.runtime.BoundedBlockingRecordQueue) TaskPublisher(org.apache.gobblin.publisher.TaskPublisher) PartitionedDataWriter(org.apache.gobblin.writer.PartitionedDataWriter) AtomicReference(java.util.concurrent.atomic.AtomicReference) Task(org.apache.gobblin.runtime.Task) TaskState(org.apache.gobblin.runtime.TaskState) ImmutableList(com.google.common.collect.ImmutableList) Closer(com.google.common.io.Closer) DataWriterBuilder(org.apache.gobblin.writer.DataWriterBuilder) Optional(com.google.common.base.Optional) SuppressWarnings(edu.umd.cs.findbugs.annotations.SuppressWarnings) RecordStreamProcessor(org.apache.gobblin.records.RecordStreamProcessor) TaskLevelPolicyCheckResults(org.apache.gobblin.qualitychecker.task.TaskLevelPolicyCheckResults) Logger(org.slf4j.Logger) TaskContext(org.apache.gobblin.runtime.TaskContext) Converter(org.apache.gobblin.converter.Converter) Instrumented(org.apache.gobblin.instrumented.Instrumented) State(org.apache.gobblin.configuration.State) RowLevelPolicyCheckResults(org.apache.gobblin.qualitychecker.row.RowLevelPolicyCheckResults) TaskExecutor(org.apache.gobblin.runtime.TaskExecutor) Throwables(com.google.common.base.Throwables) IOException(java.io.IOException) FinalState(org.apache.gobblin.util.FinalState) ConfigurationKeys(org.apache.gobblin.configuration.ConfigurationKeys) DataWriter(org.apache.gobblin.writer.DataWriter) List(java.util.List) RecordEnvelope(org.apache.gobblin.stream.RecordEnvelope) WatermarkAwareWriter(org.apache.gobblin.writer.WatermarkAwareWriter) DataWriterWrapperBuilder(org.apache.gobblin.writer.DataWriterWrapperBuilder) Destination(org.apache.gobblin.writer.Destination) Closeable(java.io.Closeable) RowLevelPolicyChecker(org.apache.gobblin.qualitychecker.row.RowLevelPolicyChecker) RecordStreamWithMetadata(org.apache.gobblin.records.RecordStreamWithMetadata) Preconditions(com.google.common.base.Preconditions) RecordStreamConsumer(org.apache.gobblin.records.RecordStreamConsumer) DataConversionException(org.apache.gobblin.converter.DataConversionException) TaskMetrics(org.apache.gobblin.runtime.util.TaskMetrics) Constructs(org.apache.gobblin.Constructs) MultiConverter(org.apache.gobblin.runtime.MultiConverter) ConstructState(org.apache.gobblin.state.ConstructState) MultiConverter(org.apache.gobblin.runtime.MultiConverter) RecordEnvelope(org.apache.gobblin.stream.RecordEnvelope) Converter(org.apache.gobblin.converter.Converter) MultiConverter(org.apache.gobblin.runtime.MultiConverter) ControlMessage(org.apache.gobblin.stream.ControlMessage) SuppressWarnings(edu.umd.cs.findbugs.annotations.SuppressWarnings)

Aggregations

RecordEnvelope (org.apache.gobblin.stream.RecordEnvelope)23 Test (org.testng.annotations.Test)13 State (org.apache.gobblin.configuration.State)7 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)7 IOException (java.io.IOException)6 ArrayList (java.util.ArrayList)5 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)4 RecordStreamWithMetadata (org.apache.gobblin.records.RecordStreamWithMetadata)4 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)4 FinalState (org.apache.gobblin.util.FinalState)4 CheckpointableWatermark (org.apache.gobblin.source.extractor.CheckpointableWatermark)3 List (java.util.List)2 Properties (java.util.Properties)2 BasicAckableForTesting (org.apache.gobblin.ack.BasicAckableForTesting)2 DataConversionException (org.apache.gobblin.converter.DataConversionException)2 NonTransientException (org.apache.gobblin.exception.NonTransientException)2 TaskPublisher (org.apache.gobblin.publisher.TaskPublisher)2 RowLevelPolicyCheckResults (org.apache.gobblin.qualitychecker.row.RowLevelPolicyCheckResults)2 RowLevelPolicyChecker (org.apache.gobblin.qualitychecker.row.RowLevelPolicyChecker)2 TaskLevelPolicyCheckResults (org.apache.gobblin.qualitychecker.task.TaskLevelPolicyCheckResults)2