Search in sources :

Example 1 with TableDescriptor

use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.

the class TestCachingTable method doTestSerialize.

private void doTestSerialize(TableDescriptor cache) {
    CachingTableDescriptor desc;
    TableDescriptor table = createDummyTableDescriptor("2");
    if (cache == null) {
        desc = new CachingTableDescriptor("1", table).withReadTtl(Duration.ofMinutes(3)).withWriteTtl(Duration.ofMinutes(4)).withCacheSize(1000);
    } else {
        desc = new CachingTableDescriptor("1", table, cache);
    }
    desc.withWriteAround();
    Map<String, String> tableConfig = desc.toConfig(new MapConfig());
    assertEquals("2", CachingTableDescriptor.REAL_TABLE_ID, "1", tableConfig);
    if (cache == null) {
        assertEquals("180000", CachingTableDescriptor.READ_TTL_MS, "1", tableConfig);
        assertEquals("240000", CachingTableDescriptor.WRITE_TTL_MS, "1", tableConfig);
    } else {
        assertEquals(cache.getTableId(), CachingTableDescriptor.CACHE_TABLE_ID, "1", tableConfig);
    }
    assertEquals("true", CachingTableDescriptor.WRITE_AROUND, "1", tableConfig);
}
Also used : CachingTableDescriptor(org.apache.samza.table.descriptors.CachingTableDescriptor) Matchers.anyString(org.mockito.Matchers.anyString) MapConfig(org.apache.samza.config.MapConfig) GuavaCacheTableDescriptor(org.apache.samza.table.descriptors.GuavaCacheTableDescriptor) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) CachingTableDescriptor(org.apache.samza.table.descriptors.CachingTableDescriptor) BaseTableDescriptor(org.apache.samza.table.descriptors.BaseTableDescriptor)

Example 2 with TableDescriptor

use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.

the class TestTableConfigGenerator method testWithSerdes.

@Test
public void testWithSerdes() {
    List<TableDescriptor> descriptors = Arrays.asList(new MockLocalTableDescriptor("t1", KVSerde.of(new StringSerde(), new IntegerSerde())), new MockLocalTableDescriptor("t2", KVSerde.of(new StringSerde(), new IntegerSerde())));
    Config jobConfig = new MapConfig(TableConfigGenerator.generateSerdeConfig(descriptors));
    JavaTableConfig javaTableConfig = new JavaTableConfig(jobConfig);
    assertNotNull(javaTableConfig.getKeySerde("t1"));
    assertNotNull(javaTableConfig.getMsgSerde("t1"));
    assertNotNull(javaTableConfig.getKeySerde("t2"));
    assertNotNull(javaTableConfig.getMsgSerde("t2"));
    MapConfig tableConfig = new MapConfig(TableConfigGenerator.generate(jobConfig, descriptors));
    javaTableConfig = new JavaTableConfig(tableConfig);
    assertNotNull(javaTableConfig.getTableProviderFactory("t1"));
    assertNotNull(javaTableConfig.getTableProviderFactory("t2"));
}
Also used : StringSerde(org.apache.samza.serializers.StringSerde) Config(org.apache.samza.config.Config) JavaTableConfig(org.apache.samza.config.JavaTableConfig) MapConfig(org.apache.samza.config.MapConfig) JavaTableConfig(org.apache.samza.config.JavaTableConfig) MapConfig(org.apache.samza.config.MapConfig) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) LocalTableDescriptor(org.apache.samza.table.descriptors.LocalTableDescriptor) IntegerSerde(org.apache.samza.serializers.IntegerSerde) Test(org.junit.Test)

Example 3 with TableDescriptor

use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.

the class JobNodeConfigurationGenerator method generateJobConfig.

JobConfig generateJobConfig(JobNode jobNode, String executionPlanJson) {
    if (jobNode.isLegacyTaskApplication()) {
        return new JobConfig(jobNode.getConfig());
    }
    Map<String, String> generatedConfig = new HashMap<>();
    // set up job name and job ID
    generatedConfig.put(JobConfig.JOB_NAME, jobNode.getJobName());
    generatedConfig.put(JobConfig.JOB_ID, jobNode.getJobId());
    Map<String, StreamEdge> inEdges = jobNode.getInEdges();
    Map<String, StreamEdge> outEdges = jobNode.getOutEdges();
    Collection<OperatorSpec> reachableOperators = jobNode.getReachableOperators();
    List<StoreDescriptor> stores = getStoreDescriptors(reachableOperators);
    Map<String, TableDescriptor> reachableTables = getReachableTables(reachableOperators, jobNode);
    // config passed by the JobPlanner. user-provided + system-stream descriptor config + misc. other config
    Config originalConfig = jobNode.getConfig();
    // check all inputs to the node for broadcast and input streams
    final Set<String> inputs = new HashSet<>();
    final Set<String> broadcastInputs = new HashSet<>();
    for (StreamEdge inEdge : inEdges.values()) {
        String formattedSystemStream = inEdge.getName();
        if (inEdge.isBroadcast()) {
            if (inEdge.getPartitionCount() > 1) {
                broadcastInputs.add(formattedSystemStream + "#[0-" + (inEdge.getPartitionCount() - 1) + "]");
            } else {
                broadcastInputs.add(formattedSystemStream + "#0");
            }
        } else {
            inputs.add(formattedSystemStream);
        }
    }
    configureBroadcastInputs(generatedConfig, originalConfig, broadcastInputs);
    // compute window and join operator intervals in this node
    configureWindowInterval(generatedConfig, originalConfig, reachableOperators);
    // set store configuration for stateful operators.
    stores.forEach(sd -> generatedConfig.putAll(sd.getStorageConfigs()));
    // set the execution plan in json
    generatedConfig.put(CONFIG_INTERNAL_EXECUTION_PLAN, executionPlanJson);
    // write intermediate input/output streams to configs
    inEdges.values().stream().filter(StreamEdge::isIntermediate).forEach(intermediateEdge -> generatedConfig.putAll(intermediateEdge.generateConfig()));
    // write serialized serde instances and stream, store, and table serdes to configs
    // serde configuration generation has to happen before table configuration, since the serde configuration
    // is required when generating configurations for some TableProvider (i.e. local store backed tables)
    configureSerdes(generatedConfig, inEdges, outEdges, stores, reachableTables.keySet(), jobNode);
    // generate table configuration and potential side input configuration
    configureTables(generatedConfig, originalConfig, reachableTables, inputs);
    // generate the task.inputs configuration
    generatedConfig.put(TaskConfig.INPUT_STREAMS, Joiner.on(',').join(inputs));
    LOG.info("Job {} has generated configs {}", jobNode.getJobNameAndId(), generatedConfig);
    return new JobConfig(mergeConfig(originalConfig, generatedConfig));
}
Also used : HashMap(java.util.HashMap) JobConfig(org.apache.samza.config.JobConfig) StreamConfig(org.apache.samza.config.StreamConfig) ApplicationConfig(org.apache.samza.config.ApplicationConfig) MapConfig(org.apache.samza.config.MapConfig) StorageConfig(org.apache.samza.config.StorageConfig) SerializerConfig(org.apache.samza.config.SerializerConfig) TaskConfig(org.apache.samza.config.TaskConfig) JavaTableConfig(org.apache.samza.config.JavaTableConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) LocalTableDescriptor(org.apache.samza.table.descriptors.LocalTableDescriptor) StoreDescriptor(org.apache.samza.operators.spec.StoreDescriptor) JoinOperatorSpec(org.apache.samza.operators.spec.JoinOperatorSpec) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) WindowOperatorSpec(org.apache.samza.operators.spec.WindowOperatorSpec) StatefulOperatorSpec(org.apache.samza.operators.spec.StatefulOperatorSpec) HashSet(java.util.HashSet)

Example 4 with TableDescriptor

use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.

the class JobNodeConfigurationGenerator method configureTables.

private void configureTables(Map<String, String> generatedConfig, Config originalConfig, Map<String, TableDescriptor> tables, Set<String> inputs) {
    generatedConfig.putAll(TableConfigGenerator.generate(new MapConfig(generatedConfig), new ArrayList<>(tables.values())));
    // Add side inputs to the inputs and mark the stream as bootstrap
    tables.values().forEach(tableDescriptor -> {
        if (tableDescriptor instanceof LocalTableDescriptor) {
            LocalTableDescriptor localTableDescriptor = (LocalTableDescriptor) tableDescriptor;
            List<String> sideInputs = localTableDescriptor.getSideInputs();
            if (sideInputs != null && !sideInputs.isEmpty()) {
                sideInputs.stream().map(sideInput -> StreamUtil.getSystemStreamFromNameOrId(originalConfig, sideInput)).forEach(systemStream -> {
                    inputs.add(StreamUtil.getNameFromSystemStream(systemStream));
                    generatedConfig.put(String.format(StreamConfig.STREAM_PREFIX + StreamConfig.BOOTSTRAP, systemStream.getSystem(), systemStream.getStream()), "true");
                });
            }
        }
    });
}
Also used : ConfigUtil(org.apache.samza.util.ConfigUtil) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) LoggerFactory(org.slf4j.LoggerFactory) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) JoinOperatorSpec(org.apache.samza.operators.spec.JoinOperatorSpec) Serde(org.apache.samza.serializers.Serde) LocalTableDescriptor(org.apache.samza.table.descriptors.LocalTableDescriptor) StringUtils(org.apache.commons.lang3.StringUtils) StreamConfig(org.apache.samza.config.StreamConfig) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) Map(java.util.Map) ApplicationConfig(org.apache.samza.config.ApplicationConfig) StreamUtil(org.apache.samza.util.StreamUtil) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) SerializableSerde(org.apache.samza.serializers.SerializableSerde) StorageConfig(org.apache.samza.config.StorageConfig) Logger(org.slf4j.Logger) SerializerConfig(org.apache.samza.config.SerializerConfig) TaskConfig(org.apache.samza.config.TaskConfig) Collection(java.util.Collection) Set(java.util.Set) WindowOperatorSpec(org.apache.samza.operators.spec.WindowOperatorSpec) UUID(java.util.UUID) JavaTableConfig(org.apache.samza.config.JavaTableConfig) Collectors(java.util.stream.Collectors) SamzaException(org.apache.samza.SamzaException) StoreDescriptor(org.apache.samza.operators.spec.StoreDescriptor) Objects(java.util.Objects) Base64(java.util.Base64) List(java.util.List) StatefulOperatorSpec(org.apache.samza.operators.spec.StatefulOperatorSpec) Config(org.apache.samza.config.Config) MathUtil(org.apache.samza.util.MathUtil) TableConfigGenerator(org.apache.samza.table.TableConfigGenerator) Joiner(com.google.common.base.Joiner) LocalTableDescriptor(org.apache.samza.table.descriptors.LocalTableDescriptor) ArrayList(java.util.ArrayList) MapConfig(org.apache.samza.config.MapConfig)

Example 5 with TableDescriptor

use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.

the class TestExecutionPlanner method createStreamGraphWithStreamTableJoinWithSideInputs.

private StreamApplicationDescriptorImpl createStreamGraphWithStreamTableJoinWithSideInputs() {
    /**
     * Example stream-table join where table t is configured with input1 (64) as a side-input stream.
     *
     *                                   join-table t -> output1 (8)
     *                                        |
     *    input2 (16) -> partitionBy ("64") __|
     */
    return new StreamApplicationDescriptorImpl(appDesc -> {
        MessageStream<KV<Object, Object>> messageStream2 = appDesc.getInputStream(input2Descriptor);
        OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
        TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor("table-id", new KVSerde(new StringSerde(), new StringSerde())).withSideInputs(Arrays.asList("input1")).withSideInputsProcessor(mock(SideInputsProcessor.class));
        Table table = appDesc.getTable(tableDescriptor);
        messageStream2.partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1").join(table, mock(StreamTableJoinFunction.class)).sendTo(output1);
    }, config);
}
Also used : KVSerde(org.apache.samza.serializers.KVSerde) StringSerde(org.apache.samza.serializers.StringSerde) Table(org.apache.samza.table.Table) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) KV(org.apache.samza.operators.KV) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) TestLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor) SideInputsProcessor(org.apache.samza.storage.SideInputsProcessor)

Aggregations

TableDescriptor (org.apache.samza.table.descriptors.TableDescriptor)17 StringSerde (org.apache.samza.serializers.StringSerde)9 MapConfig (org.apache.samza.config.MapConfig)8 KV (org.apache.samza.operators.KV)8 KVSerde (org.apache.samza.serializers.KVSerde)8 HashMap (java.util.HashMap)7 StreamApplicationDescriptorImpl (org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl)7 TestLocalTableDescriptor (org.apache.samza.table.descriptors.TestLocalTableDescriptor)7 Config (org.apache.samza.config.Config)6 Table (org.apache.samza.table.Table)6 LocalTableDescriptor (org.apache.samza.table.descriptors.LocalTableDescriptor)6 HashSet (java.util.HashSet)5 Map (java.util.Map)5 Serde (org.apache.samza.serializers.Serde)5 ArrayList (java.util.ArrayList)4 JavaTableConfig (org.apache.samza.config.JavaTableConfig)4 JobConfig (org.apache.samza.config.JobConfig)4 MessageStream (org.apache.samza.operators.MessageStream)4 StreamTableJoinFunction (org.apache.samza.operators.functions.StreamTableJoinFunction)4 NoOpSerde (org.apache.samza.serializers.NoOpSerde)4