Search in sources :

Example 6 with TableDescriptor

use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.

the class TestJobGraphJsonGenerator method testTaskApplication.

@Test
public void testTaskApplication() throws Exception {
    JobGraphJsonGenerator jsonGenerator = new JobGraphJsonGenerator();
    JobGraph mockJobGraph = mock(JobGraph.class);
    ApplicationConfig mockAppConfig = mock(ApplicationConfig.class);
    when(mockAppConfig.getAppName()).thenReturn("testTaskApp");
    when(mockAppConfig.getAppId()).thenReturn("testTaskAppId");
    when(mockJobGraph.getApplicationConfig()).thenReturn(mockAppConfig);
    // compute the three disjoint sets of the JobGraph: input only, output only, and intermediate streams
    Set<StreamEdge> inEdges = new HashSet<>(mockJobNode.getInEdges().values());
    Set<StreamEdge> outEdges = new HashSet<>(mockJobNode.getOutEdges().values());
    Set<StreamEdge> intermediateEdges = new HashSet<>(inEdges);
    // intermediate streams are the intersection between input and output
    intermediateEdges.retainAll(outEdges);
    // remove all intermediate streams from input
    inEdges.removeAll(intermediateEdges);
    // remove all intermediate streams from output
    outEdges.removeAll(intermediateEdges);
    // set the return values for mockJobGraph
    when(mockJobGraph.getInputStreams()).thenReturn(inEdges);
    when(mockJobGraph.getOutputStreams()).thenReturn(outEdges);
    when(mockJobGraph.getIntermediateStreamEdges()).thenReturn(intermediateEdges);
    Set<TableDescriptor> tables = new HashSet<>(mockJobNode.getTables().values());
    when(mockJobGraph.getTables()).thenReturn(tables);
    when(mockJobGraph.getJobNodes()).thenReturn(Collections.singletonList(mockJobNode));
    String graphJson = jsonGenerator.toJson(mockJobGraph);
    ObjectMapper objectMapper = new ObjectMapper();
    JobGraphJsonGenerator.JobGraphJson jsonObject = objectMapper.readValue(graphJson.getBytes(), JobGraphJsonGenerator.JobGraphJson.class);
    assertEquals("testTaskAppId", jsonObject.applicationId);
    assertEquals("testTaskApp", jsonObject.applicationName);
    Set<String> inStreamIds = inEdges.stream().map(stream -> stream.getStreamSpec().getId()).collect(Collectors.toSet());
    assertThat(jsonObject.sourceStreams.keySet(), Matchers.containsInAnyOrder(inStreamIds.toArray()));
    Set<String> outStreamIds = outEdges.stream().map(stream -> stream.getStreamSpec().getId()).collect(Collectors.toSet());
    assertThat(jsonObject.sinkStreams.keySet(), Matchers.containsInAnyOrder(outStreamIds.toArray()));
    Set<String> intStreamIds = intermediateEdges.stream().map(stream -> stream.getStreamSpec().getId()).collect(Collectors.toSet());
    assertThat(jsonObject.intermediateStreams.keySet(), Matchers.containsInAnyOrder(intStreamIds.toArray()));
    Set<String> tableIds = tables.stream().map(t -> t.getTableId()).collect(Collectors.toSet());
    assertThat(jsonObject.tables.keySet(), Matchers.containsInAnyOrder(tableIds.toArray()));
    JobGraphJsonGenerator.JobNodeJson expectedNodeJson = new JobGraphJsonGenerator.JobNodeJson();
    expectedNodeJson.jobId = mockJobNode.getJobId();
    expectedNodeJson.jobName = mockJobNode.getJobName();
    assertEquals(1, jsonObject.jobs.size());
    JobGraphJsonGenerator.JobNodeJson actualNodeJson = jsonObject.jobs.get(0);
    assertEquals(expectedNodeJson.jobId, actualNodeJson.jobId);
    assertEquals(expectedNodeJson.jobName, actualNodeJson.jobName);
    assertEquals(3, actualNodeJson.operatorGraph.inputStreams.size());
    assertEquals(2, actualNodeJson.operatorGraph.outputStreams.size());
    assertEquals(0, actualNodeJson.operatorGraph.operators.size());
}
Also used : StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) Serde(org.apache.samza.serializers.Serde) TestLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) SendToTableOperatorSpec(org.apache.samza.operators.spec.SendToTableOperatorSpec) StringSerde(org.apache.samza.serializers.StringSerde) HashSet(java.util.HashSet) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) TestExecutionPlanner(org.apache.samza.execution.TestExecutionPlanner) Duration(java.time.Duration) Map(java.util.Map) ApplicationConfig(org.apache.samza.config.ApplicationConfig) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) MessageStream(org.apache.samza.operators.MessageStream) LongSerde(org.apache.samza.serializers.LongSerde) Before(org.junit.Before) Windows(org.apache.samza.operators.windows.Windows) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Set(java.util.Set) Matchers(org.hamcrest.Matchers) StreamSpec(org.apache.samza.system.StreamSpec) Test(org.junit.Test) JoinFunction(org.apache.samza.operators.functions.JoinFunction) Collectors(java.util.stream.Collectors) Mockito(org.mockito.Mockito) OperatorSpecs(org.apache.samza.operators.spec.OperatorSpecs) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec) SystemAdmin(org.apache.samza.system.SystemAdmin) Config(org.apache.samza.config.Config) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) Assert(org.junit.Assert) Collections(java.util.Collections) OutputStream(org.apache.samza.operators.OutputStream) SystemAdmins(org.apache.samza.system.SystemAdmins) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) TestLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor) ApplicationConfig(org.apache.samza.config.ApplicationConfig) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 7 with TableDescriptor

use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.

the class TestJobGraphJsonGenerator method setUp.

@Before
public void setUp() {
    input1Spec = new StreamSpec("input1", "input1", "input-system");
    input2Spec = new StreamSpec("input2", "input2", "input-system");
    outputSpec = new StreamSpec("output", "output", "output-system");
    repartitionSpec = new StreamSpec("jobName-jobId-partition_by-p1", "partition_by-p1", "intermediate-system");
    defaultSerde = KVSerde.of(new StringSerde(), new JsonSerdeV2<>());
    inputSystemDescriptor = new GenericSystemDescriptor("input-system", "mockSystemFactoryClassName");
    outputSystemDescriptor = new GenericSystemDescriptor("output-system", "mockSystemFactoryClassName");
    intermediateSystemDescriptor = new GenericSystemDescriptor("intermediate-system", "mockSystemFactoryClassName");
    input1Descriptor = inputSystemDescriptor.getInputDescriptor("input1", defaultSerde);
    input2Descriptor = inputSystemDescriptor.getInputDescriptor("input2", defaultSerde);
    outputDescriptor = outputSystemDescriptor.getOutputDescriptor("output", defaultSerde);
    table1Descriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor("table1", defaultSerde);
    table2Descriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor("table2", defaultSerde);
    Map<String, String> configs = new HashMap<>();
    configs.put(JobConfig.JOB_NAME, "jobName");
    configs.put(JobConfig.JOB_ID, "jobId");
    mockConfig = spy(new MapConfig(configs));
    mockJobNode = mock(JobNode.class);
    StreamEdge input1Edge = new StreamEdge(input1Spec, false, false, mockConfig);
    StreamEdge input2Edge = new StreamEdge(input2Spec, false, false, mockConfig);
    StreamEdge outputEdge = new StreamEdge(outputSpec, false, false, mockConfig);
    StreamEdge repartitionEdge = new StreamEdge(repartitionSpec, true, false, mockConfig);
    Map<String, StreamEdge> inputEdges = new HashMap<>();
    inputEdges.put(input1Descriptor.getStreamId(), input1Edge);
    inputEdges.put(input2Descriptor.getStreamId(), input2Edge);
    inputEdges.put(repartitionSpec.getId(), repartitionEdge);
    Map<String, StreamEdge> outputEdges = new HashMap<>();
    outputEdges.put(outputDescriptor.getStreamId(), outputEdge);
    outputEdges.put(repartitionSpec.getId(), repartitionEdge);
    when(mockJobNode.getInEdges()).thenReturn(inputEdges);
    when(mockJobNode.getOutEdges()).thenReturn(outputEdges);
    when(mockJobNode.getConfig()).thenReturn(mockConfig);
    when(mockJobNode.getJobName()).thenReturn("jobName");
    when(mockJobNode.getJobId()).thenReturn("jobId");
    when(mockJobNode.getJobNameAndId()).thenReturn(JobNode.createJobNameAndId("jobName", "jobId"));
    Map<String, TableDescriptor> tables = new HashMap<>();
    tables.put(table1Descriptor.getTableId(), table1Descriptor);
    tables.put(table2Descriptor.getTableId(), table2Descriptor);
    when(mockJobNode.getTables()).thenReturn(tables);
}
Also used : StreamSpec(org.apache.samza.system.StreamSpec) StringSerde(org.apache.samza.serializers.StringSerde) TestLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor) HashMap(java.util.HashMap) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) TestLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor) MapConfig(org.apache.samza.config.MapConfig) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) Before(org.junit.Before)

Example 8 with TableDescriptor

use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.

the class TestExecutionPlanner method createStreamGraphWithInvalidStreamTableJoinWithSideInputs.

private StreamApplicationDescriptorImpl createStreamGraphWithInvalidStreamTableJoinWithSideInputs() {
    /**
     * Example stream-table join that is invalid due to disagreement in partition count between the
     * stream behind table t and another joined stream. Table t is configured with input2 (16) as
     * side-input stream.
     *
     *                   join-table t -> output1 (8)
     *                         |
     *    input1 (64) —————————
     */
    return new StreamApplicationDescriptorImpl(appDesc -> {
        MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);
        OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
        TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor("table-id", new KVSerde(new StringSerde(), new StringSerde())).withSideInputs(Arrays.asList("input2")).withSideInputsProcessor(mock(SideInputsProcessor.class));
        Table table = appDesc.getTable(tableDescriptor);
        messageStream1.join(table, mock(StreamTableJoinFunction.class)).sendTo(output1);
    }, config);
}
Also used : KVSerde(org.apache.samza.serializers.KVSerde) StringSerde(org.apache.samza.serializers.StringSerde) Table(org.apache.samza.table.Table) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) KV(org.apache.samza.operators.KV) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) TestLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor) SideInputsProcessor(org.apache.samza.storage.SideInputsProcessor)

Example 9 with TableDescriptor

use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.

the class ExecutionPlanner method createJobGraph.

/**
 * Creates the physical graph from {@link ApplicationDescriptorImpl}
 */
/* package private */
JobGraph createJobGraph(ApplicationDescriptorImpl<? extends ApplicationDescriptor> appDesc) {
    JobGraph jobGraph = new JobGraph(config, appDesc);
    // Source streams contain both input and intermediate streams.
    Set<StreamSpec> sourceStreams = getStreamSpecs(appDesc.getInputStreamIds(), streamConfig);
    // Sink streams contain both output and intermediate streams.
    Set<StreamSpec> sinkStreams = getStreamSpecs(appDesc.getOutputStreamIds(), streamConfig);
    Set<StreamSpec> intermediateStreams = Sets.intersection(sourceStreams, sinkStreams);
    Set<StreamSpec> inputStreams = Sets.difference(sourceStreams, intermediateStreams);
    Set<StreamSpec> outputStreams = Sets.difference(sinkStreams, intermediateStreams);
    Set<TableDescriptor> tables = appDesc.getTableDescriptors();
    // Generate job.id and job.name configs from app.id and app.name if defined
    MapConfig generatedJobConfigs = JobPlanner.generateSingleJobConfig(config);
    String jobName = generatedJobConfigs.get(JobConfig.JOB_NAME);
    String jobId = generatedJobConfigs.get(JobConfig.JOB_ID, "1");
    // For this phase, we have a single job node for the whole DAG
    JobNode node = jobGraph.getOrCreateJobNode(jobName, jobId);
    // Add input streams
    inputStreams.forEach(spec -> jobGraph.addInputStream(spec, node));
    // Add output streams
    outputStreams.forEach(spec -> jobGraph.addOutputStream(spec, node));
    // Add intermediate streams
    intermediateStreams.forEach(spec -> jobGraph.addIntermediateStream(spec, node, node));
    // Add tables
    for (TableDescriptor table : tables) {
        jobGraph.addTable(table, node);
        // Add side-input streams (if any)
        if (table instanceof LocalTableDescriptor) {
            LocalTableDescriptor localTable = (LocalTableDescriptor) table;
            Iterable<String> sideInputs = ListUtils.emptyIfNull(localTable.getSideInputs());
            for (String sideInput : sideInputs) {
                jobGraph.addSideInputStream(getStreamSpec(sideInput, streamConfig));
            }
        }
    }
    if (!LegacyTaskApplication.class.isAssignableFrom(appDesc.getAppClass())) {
        // skip the validation when input streamIds are empty. This is only possible for LegacyTaskApplication
        jobGraph.validate();
    }
    return jobGraph;
}
Also used : StreamUtil.getStreamSpec(org.apache.samza.util.StreamUtil.getStreamSpec) StreamSpec(org.apache.samza.system.StreamSpec) LocalTableDescriptor(org.apache.samza.table.descriptors.LocalTableDescriptor) LegacyTaskApplication(org.apache.samza.application.LegacyTaskApplication) MapConfig(org.apache.samza.config.MapConfig) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) LocalTableDescriptor(org.apache.samza.table.descriptors.LocalTableDescriptor)

Example 10 with TableDescriptor

use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.

the class QueryTranslator method sendToOutputStream.

private void sendToOutputStream(String queryLogicalId, String logicalOpId, String sinkStream, StreamApplicationDescriptor appDesc, TranslatorContext translatorContext, RelNode node, int queryId) {
    SqlIOConfig sinkConfig = sqlConfig.getOutputSystemStreamConfigsBySource().get(sinkStream);
    MessageStream<SamzaSqlRelMessage> stream = translatorContext.getMessageStream(node.getId());
    MessageStream<KV<Object, Object>> outputStream = stream.map(new OutputMapFunction(queryLogicalId, logicalOpId, sinkStream, queryId));
    Optional<TableDescriptor> tableDescriptor = sinkConfig.getTableDescriptor();
    if (!tableDescriptor.isPresent()) {
        KVSerde<Object, Object> noOpKVSerde = KVSerde.of(new NoOpSerde<>(), new NoOpSerde<>());
        String systemName = sinkConfig.getSystemName();
        DelegatingSystemDescriptor sd = systemDescriptors.computeIfAbsent(systemName, DelegatingSystemDescriptor::new);
        GenericOutputDescriptor<KV<Object, Object>> osd = sd.getOutputDescriptor(sinkConfig.getStreamId(), noOpKVSerde);
        OutputStream stm = outputMsgStreams.computeIfAbsent(sinkConfig.getSource(), v -> appDesc.getOutputStream(osd));
        outputStream.sendTo(stm);
        // Process system events only if the output is a stream.
        if (sqlConfig.isProcessSystemEvents()) {
            for (MessageStream<SamzaSqlInputMessage> inputStream : inputMsgStreams.values()) {
                MessageStream<KV<Object, Object>> systemEventStream = inputStream.filter(message -> message.getMetadata().isSystemMessage()).map(SamzaSqlInputMessage::getKeyAndMessageKV);
                systemEventStream.sendTo(stm);
            }
        }
    } else {
        Table outputTable = appDesc.getTable(tableDescriptor.get());
        if (outputTable == null) {
            String msg = "Failed to obtain table descriptor of " + sinkConfig.getSource();
            throw new SamzaException(msg);
        }
        outputStream.sendTo(outputTable);
    }
}
Also used : SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) TableScan(org.apache.calcite.rel.core.TableScan) SamzaSqlRelMsgMetadata(org.apache.samza.sql.data.SamzaSqlRelMsgMetadata) LogicalFilter(org.apache.calcite.rel.logical.LogicalFilter) RelShuttleImpl(org.apache.calcite.rel.RelShuttleImpl) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) RelRoot(org.apache.calcite.rel.RelRoot) TaskContext(org.apache.samza.context.TaskContext) MapFunction(org.apache.samza.operators.functions.MapFunction) Counter(org.apache.samza.metrics.Counter) LogicalJoin(org.apache.calcite.rel.logical.LogicalJoin) QueryPlanner(org.apache.samza.sql.planner.QueryPlanner) ApplicationContainerContext(org.apache.samza.context.ApplicationContainerContext) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) Map(java.util.Map) TableModify(org.apache.calcite.rel.core.TableModify) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage) SamzaHistogram(org.apache.samza.metrics.SamzaHistogram) ExternalContext(org.apache.samza.context.ExternalContext) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) MessageStream(org.apache.samza.operators.MessageStream) Table(org.apache.samza.table.Table) Logger(org.slf4j.Logger) LogicalProject(org.apache.calcite.rel.logical.LogicalProject) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) JobContext(org.apache.samza.context.JobContext) ContainerContext(org.apache.samza.context.ContainerContext) SamzaRelConverter(org.apache.samza.sql.interfaces.SamzaRelConverter) SamzaSqlExecutionContext(org.apache.samza.sql.data.SamzaSqlExecutionContext) SamzaSqlApplicationConfig(org.apache.samza.sql.runner.SamzaSqlApplicationConfig) MetricsRegistry(org.apache.samza.metrics.MetricsRegistry) RelNode(org.apache.calcite.rel.RelNode) SamzaException(org.apache.samza.SamzaException) ApplicationTaskContextFactory(org.apache.samza.context.ApplicationTaskContextFactory) SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) Context(org.apache.samza.context.Context) LogicalAggregate(org.apache.calcite.rel.logical.LogicalAggregate) Validate(org.apache.commons.lang3.Validate) SamzaSqlQueryParser(org.apache.samza.sql.util.SamzaSqlQueryParser) StreamApplicationDescriptor(org.apache.samza.application.descriptors.StreamApplicationDescriptor) SamzaSqlInputMessage(org.apache.samza.sql.SamzaSqlInputMessage) Optional(java.util.Optional) SamzaSqlApplicationContext(org.apache.samza.sql.runner.SamzaSqlApplicationContext) VisibleForTesting(com.google.common.annotations.VisibleForTesting) KVSerde(org.apache.samza.serializers.KVSerde) OutputStream(org.apache.samza.operators.OutputStream) Table(org.apache.samza.table.Table) OutputStream(org.apache.samza.operators.OutputStream) KV(org.apache.samza.operators.KV) SamzaSqlInputMessage(org.apache.samza.sql.SamzaSqlInputMessage) SamzaException(org.apache.samza.SamzaException) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) DelegatingSystemDescriptor(org.apache.samza.system.descriptors.DelegatingSystemDescriptor) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage)

Aggregations

TableDescriptor (org.apache.samza.table.descriptors.TableDescriptor)17 StringSerde (org.apache.samza.serializers.StringSerde)9 MapConfig (org.apache.samza.config.MapConfig)8 KV (org.apache.samza.operators.KV)8 KVSerde (org.apache.samza.serializers.KVSerde)8 HashMap (java.util.HashMap)7 StreamApplicationDescriptorImpl (org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl)7 TestLocalTableDescriptor (org.apache.samza.table.descriptors.TestLocalTableDescriptor)7 Config (org.apache.samza.config.Config)6 Table (org.apache.samza.table.Table)6 LocalTableDescriptor (org.apache.samza.table.descriptors.LocalTableDescriptor)6 HashSet (java.util.HashSet)5 Map (java.util.Map)5 Serde (org.apache.samza.serializers.Serde)5 ArrayList (java.util.ArrayList)4 JavaTableConfig (org.apache.samza.config.JavaTableConfig)4 JobConfig (org.apache.samza.config.JobConfig)4 MessageStream (org.apache.samza.operators.MessageStream)4 StreamTableJoinFunction (org.apache.samza.operators.functions.StreamTableJoinFunction)4 NoOpSerde (org.apache.samza.serializers.NoOpSerde)4