Search in sources :

Example 1 with RowLevelPolicyChecker

use of org.apache.gobblin.qualitychecker.row.RowLevelPolicyChecker in project incubator-gobblin by apache.

the class TestRecordStream method setupTask.

private Task setupTask(Extractor extractor, DataWriterBuilder writer, List<Converter<?, ?, ?, ?>> converters, List<RecordStreamProcessor<?, ?, ?, ?>> recordStreamProcessors) throws Exception {
    // Create a TaskState
    TaskState taskState = getEmptyTestTaskState("testRetryTaskId");
    taskState.setProp(ConfigurationKeys.TASK_SYNCHRONOUS_EXECUTION_MODEL_KEY, false);
    // Create a mock TaskContext
    TaskContext mockTaskContext = mock(TaskContext.class);
    when(mockTaskContext.getExtractor()).thenReturn(extractor);
    when(mockTaskContext.getForkOperator()).thenReturn(new IdentityForkOperator());
    when(mockTaskContext.getTaskState()).thenReturn(taskState);
    when(mockTaskContext.getConverters()).thenReturn(converters);
    when(mockTaskContext.getRecordStreamProcessors()).thenReturn(recordStreamProcessors);
    when(mockTaskContext.getTaskLevelPolicyChecker(any(TaskState.class), anyInt())).thenReturn(mock(TaskLevelPolicyChecker.class));
    when(mockTaskContext.getRowLevelPolicyChecker()).thenReturn(new RowLevelPolicyChecker(Lists.newArrayList(), "ss", FileSystem.getLocal(new Configuration())));
    when(mockTaskContext.getRowLevelPolicyChecker(anyInt())).thenReturn(new RowLevelPolicyChecker(Lists.newArrayList(), "ss", FileSystem.getLocal(new Configuration())));
    when(mockTaskContext.getDataWriterBuilder(anyInt(), anyInt())).thenReturn(writer);
    // Create a mock TaskPublisher
    TaskPublisher mockTaskPublisher = mock(TaskPublisher.class);
    when(mockTaskPublisher.canPublish()).thenReturn(TaskPublisher.PublisherState.SUCCESS);
    when(mockTaskContext.getTaskPublisher(any(TaskState.class), any(TaskLevelPolicyCheckResults.class))).thenReturn(mockTaskPublisher);
    // Create a mock TaskStateTracker
    TaskStateTracker mockTaskStateTracker = mock(TaskStateTracker.class);
    // Create a TaskExecutor - a real TaskExecutor must be created so a Fork is run in a separate thread
    TaskExecutor taskExecutor = new TaskExecutor(new Properties());
    // Create the Task
    Task realTask = new Task(mockTaskContext, mockTaskStateTracker, taskExecutor, Optional.<CountDownLatch>absent());
    Task task = spy(realTask);
    doNothing().when(task).submitTaskCommittedEvent();
    return task;
}
Also used : IdentityForkOperator(org.apache.gobblin.fork.IdentityForkOperator) TaskPublisher(org.apache.gobblin.publisher.TaskPublisher) Configuration(org.apache.hadoop.conf.Configuration) TaskLevelPolicyChecker(org.apache.gobblin.qualitychecker.task.TaskLevelPolicyChecker) RowLevelPolicyChecker(org.apache.gobblin.qualitychecker.row.RowLevelPolicyChecker) TaskLevelPolicyCheckResults(org.apache.gobblin.qualitychecker.task.TaskLevelPolicyCheckResults) Properties(java.util.Properties)

Example 2 with RowLevelPolicyChecker

use of org.apache.gobblin.qualitychecker.row.RowLevelPolicyChecker in project incubator-gobblin by apache.

the class RowLevelQualityCheckerTest method testRowLevelPolicy.

@Test(groups = { "ignore" })
public void testRowLevelPolicy() throws Exception {
    State state = new State();
    state.setProp(ConfigurationKeys.ROW_LEVEL_POLICY_LIST, "org.apache.gobblin.qualitychecker.TestRowLevelPolicy");
    state.setProp(ConfigurationKeys.ROW_LEVEL_POLICY_LIST_TYPE, "FAIL");
    RowLevelPolicyChecker checker = new RowLevelPolicyCheckerBuilderFactory().newPolicyCheckerBuilder(state, -1).build();
    RowLevelPolicyCheckResults results = new RowLevelPolicyCheckResults();
    FileReader<GenericRecord> fileReader = openFile(state);
    for (GenericRecord datum : fileReader) {
        Assert.assertTrue(checker.executePolicies(datum, results));
    }
}
Also used : State(org.apache.gobblin.configuration.State) RowLevelPolicyChecker(org.apache.gobblin.qualitychecker.row.RowLevelPolicyChecker) RowLevelPolicyCheckResults(org.apache.gobblin.qualitychecker.row.RowLevelPolicyCheckResults) GenericRecord(org.apache.avro.generic.GenericRecord) RowLevelPolicyCheckerBuilderFactory(org.apache.gobblin.qualitychecker.row.RowLevelPolicyCheckerBuilderFactory) Test(org.testng.annotations.Test)

Example 3 with RowLevelPolicyChecker

use of org.apache.gobblin.qualitychecker.row.RowLevelPolicyChecker in project incubator-gobblin by apache.

the class RowLevelQualityCheckerTest method testWriteToErrFile.

@Test(groups = { "ignore" })
public void testWriteToErrFile() throws Exception {
    State state = new State();
    state.setProp(ConfigurationKeys.ROW_LEVEL_POLICY_LIST, "org.apache.gobblin.qualitychecker.TestRowLevelPolicyFail");
    state.setProp(ConfigurationKeys.ROW_LEVEL_POLICY_LIST_TYPE, "ERR_FILE");
    state.setProp(ConfigurationKeys.ROW_LEVEL_ERR_FILE, TestConstants.TEST_ERR_FILE);
    state.setProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, TestConstants.TEST_FS_URI);
    RowLevelPolicyChecker checker = new RowLevelPolicyCheckerBuilderFactory().newPolicyCheckerBuilder(state, -1).build();
    RowLevelPolicyCheckResults results = new RowLevelPolicyCheckResults();
    FileReader<GenericRecord> fileReader = openFile(state);
    for (GenericRecord datum : fileReader) {
        Assert.assertFalse(checker.executePolicies(datum, results));
    }
    FileSystem fs = FileSystem.get(new URI(TestConstants.TEST_FS_URI), new Configuration());
    Path outputPath = new Path(TestConstants.TEST_ERR_FILE, state.getProp(ConfigurationKeys.ROW_LEVEL_POLICY_LIST).replaceAll("\\.", "-") + ".err");
    Assert.assertTrue(fs.exists(outputPath));
    fs.delete(new Path(TestConstants.TEST_ERR_FILE), true);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) State(org.apache.gobblin.configuration.State) FileSystem(org.apache.hadoop.fs.FileSystem) RowLevelPolicyChecker(org.apache.gobblin.qualitychecker.row.RowLevelPolicyChecker) RowLevelPolicyCheckResults(org.apache.gobblin.qualitychecker.row.RowLevelPolicyCheckResults) GenericRecord(org.apache.avro.generic.GenericRecord) URI(java.net.URI) RowLevelPolicyCheckerBuilderFactory(org.apache.gobblin.qualitychecker.row.RowLevelPolicyCheckerBuilderFactory) Test(org.testng.annotations.Test)

Example 4 with RowLevelPolicyChecker

use of org.apache.gobblin.qualitychecker.row.RowLevelPolicyChecker in project incubator-gobblin by apache.

the class TaskContinuousTest method getMockTaskContext.

private TaskContext getMockTaskContext(ArrayList<Object> recordCollector, Extractor mockExtractor) throws Exception {
    TaskState taskState = getStreamingTaskState();
    // Create a mock RowLevelPolicyChecker
    RowLevelPolicyChecker mockRowLevelPolicyChecker = new RowLevelPolicyChecker(Lists.newArrayList(), "stateId", FileSystem.getLocal(new Configuration()));
    WatermarkStorage mockWatermarkStorage = new MockWatermarkStorage();
    // Create a mock TaskPublisher
    TaskPublisher mockTaskPublisher = mock(TaskPublisher.class);
    when(mockTaskPublisher.canPublish()).thenReturn(TaskPublisher.PublisherState.SUCCESS);
    // Create a mock TaskContext
    TaskContext mockTaskContext = mock(TaskContext.class);
    when(mockTaskContext.getTaskMetrics()).thenReturn(TaskMetrics.get(taskState));
    when(mockTaskContext.getExtractor()).thenReturn(mockExtractor);
    when(mockTaskContext.getRawSourceExtractor()).thenReturn(mockExtractor);
    when(mockTaskContext.getWatermarkStorage()).thenReturn(mockWatermarkStorage);
    when(mockTaskContext.getForkOperator()).thenReturn(new IdentityForkOperator());
    when(mockTaskContext.getTaskState()).thenReturn(taskState);
    when(mockTaskContext.getTaskPublisher(any(TaskState.class), any(TaskLevelPolicyCheckResults.class))).thenReturn(mockTaskPublisher);
    when(mockTaskContext.getRowLevelPolicyChecker()).thenReturn(mockRowLevelPolicyChecker);
    when(mockTaskContext.getRowLevelPolicyChecker(anyInt())).thenReturn(mockRowLevelPolicyChecker);
    when(mockTaskContext.getTaskLevelPolicyChecker(any(TaskState.class), anyInt())).thenReturn(mock(TaskLevelPolicyChecker.class));
    when(mockTaskContext.getDataWriterBuilder(anyInt(), anyInt())).thenReturn(new TestStreamingDataWriterBuilder(recordCollector));
    return mockTaskContext;
}
Also used : IdentityForkOperator(org.apache.gobblin.fork.IdentityForkOperator) WatermarkStorage(org.apache.gobblin.writer.WatermarkStorage) TaskPublisher(org.apache.gobblin.publisher.TaskPublisher) Configuration(org.apache.hadoop.conf.Configuration) TaskLevelPolicyChecker(org.apache.gobblin.qualitychecker.task.TaskLevelPolicyChecker) RowLevelPolicyChecker(org.apache.gobblin.qualitychecker.row.RowLevelPolicyChecker) TaskLevelPolicyCheckResults(org.apache.gobblin.qualitychecker.task.TaskLevelPolicyCheckResults)

Example 5 with RowLevelPolicyChecker

use of org.apache.gobblin.qualitychecker.row.RowLevelPolicyChecker in project incubator-gobblin by apache.

the class TaskTest method getMockTaskContext.

private TaskContext getMockTaskContext(TaskState taskState, Extractor mockExtractor, ArrayList<ArrayList<Object>> writerCollectors, ForkOperator mockForkOperator) throws Exception {
    int numForks = writerCollectors.size();
    // Create a mock RowLevelPolicyChecker
    RowLevelPolicyChecker mockRowLevelPolicyChecker = spy(new RowLevelPolicyChecker(Lists.newArrayList(), "ss", FileSystem.getLocal(new Configuration())));
    when(mockRowLevelPolicyChecker.executePolicies(any(Object.class), any(RowLevelPolicyCheckResults.class))).thenReturn(true);
    when(mockRowLevelPolicyChecker.getFinalState()).thenReturn(new State());
    // Create a mock TaskPublisher
    TaskPublisher mockTaskPublisher = mock(TaskPublisher.class);
    when(mockTaskPublisher.canPublish()).thenReturn(TaskPublisher.PublisherState.SUCCESS);
    // Create a mock TaskContext
    TaskContext mockTaskContext = mock(TaskContext.class);
    when(mockTaskContext.getExtractor()).thenReturn(mockExtractor);
    when(mockTaskContext.getRawSourceExtractor()).thenReturn(mockExtractor);
    when(mockTaskContext.getForkOperator()).thenReturn(mockForkOperator);
    when(mockTaskContext.getTaskState()).thenReturn(taskState);
    when(mockTaskContext.getTaskPublisher(any(TaskState.class), any(TaskLevelPolicyCheckResults.class))).thenReturn(mockTaskPublisher);
    when(mockTaskContext.getRowLevelPolicyChecker()).thenReturn(mockRowLevelPolicyChecker);
    when(mockTaskContext.getRowLevelPolicyChecker(anyInt())).thenReturn(mockRowLevelPolicyChecker);
    when(mockTaskContext.getTaskLevelPolicyChecker(any(TaskState.class), anyInt())).thenReturn(mock(TaskLevelPolicyChecker.class));
    for (int i = 0; i < numForks; ++i) {
        when(mockTaskContext.getDataWriterBuilder(numForks, i)).thenReturn(new RecordCollectingWriterBuilder(writerCollectors.get(i)));
    }
    return mockTaskContext;
}
Also used : TaskPublisher(org.apache.gobblin.publisher.TaskPublisher) Configuration(org.apache.hadoop.conf.Configuration) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) State(org.apache.gobblin.configuration.State) TaskLevelPolicyChecker(org.apache.gobblin.qualitychecker.task.TaskLevelPolicyChecker) RowLevelPolicyChecker(org.apache.gobblin.qualitychecker.row.RowLevelPolicyChecker) TaskLevelPolicyCheckResults(org.apache.gobblin.qualitychecker.task.TaskLevelPolicyCheckResults) RowLevelPolicyCheckResults(org.apache.gobblin.qualitychecker.row.RowLevelPolicyCheckResults)

Aggregations

RowLevelPolicyChecker (org.apache.gobblin.qualitychecker.row.RowLevelPolicyChecker)7 Configuration (org.apache.hadoop.conf.Configuration)6 TaskPublisher (org.apache.gobblin.publisher.TaskPublisher)5 TaskLevelPolicyCheckResults (org.apache.gobblin.qualitychecker.task.TaskLevelPolicyCheckResults)5 TaskLevelPolicyChecker (org.apache.gobblin.qualitychecker.task.TaskLevelPolicyChecker)5 IdentityForkOperator (org.apache.gobblin.fork.IdentityForkOperator)4 Test (org.testng.annotations.Test)4 Properties (java.util.Properties)3 State (org.apache.gobblin.configuration.State)3 RowLevelPolicyCheckResults (org.apache.gobblin.qualitychecker.row.RowLevelPolicyCheckResults)3 GenericRecord (org.apache.avro.generic.GenericRecord)2 RowLevelPolicyCheckerBuilderFactory (org.apache.gobblin.qualitychecker.row.RowLevelPolicyCheckerBuilderFactory)2 URI (java.net.URI)1 BasicAckableForTesting (org.apache.gobblin.ack.BasicAckableForTesting)1 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)1 RecordEnvelope (org.apache.gobblin.stream.RecordEnvelope)1 StreamEntity (org.apache.gobblin.stream.StreamEntity)1 WatermarkStorage (org.apache.gobblin.writer.WatermarkStorage)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1