use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.
the class TestCachingTable method doTestSerialize.
private void doTestSerialize(TableDescriptor cache) {
CachingTableDescriptor desc;
TableDescriptor table = createDummyTableDescriptor("2");
if (cache == null) {
desc = new CachingTableDescriptor("1", table).withReadTtl(Duration.ofMinutes(3)).withWriteTtl(Duration.ofMinutes(4)).withCacheSize(1000);
} else {
desc = new CachingTableDescriptor("1", table, cache);
}
desc.withWriteAround();
Map<String, String> tableConfig = desc.toConfig(new MapConfig());
assertEquals("2", CachingTableDescriptor.REAL_TABLE_ID, "1", tableConfig);
if (cache == null) {
assertEquals("180000", CachingTableDescriptor.READ_TTL_MS, "1", tableConfig);
assertEquals("240000", CachingTableDescriptor.WRITE_TTL_MS, "1", tableConfig);
} else {
assertEquals(cache.getTableId(), CachingTableDescriptor.CACHE_TABLE_ID, "1", tableConfig);
}
assertEquals("true", CachingTableDescriptor.WRITE_AROUND, "1", tableConfig);
}
use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.
the class TestTableConfigGenerator method testWithSerdes.
@Test
public void testWithSerdes() {
List<TableDescriptor> descriptors = Arrays.asList(new MockLocalTableDescriptor("t1", KVSerde.of(new StringSerde(), new IntegerSerde())), new MockLocalTableDescriptor("t2", KVSerde.of(new StringSerde(), new IntegerSerde())));
Config jobConfig = new MapConfig(TableConfigGenerator.generateSerdeConfig(descriptors));
JavaTableConfig javaTableConfig = new JavaTableConfig(jobConfig);
assertNotNull(javaTableConfig.getKeySerde("t1"));
assertNotNull(javaTableConfig.getMsgSerde("t1"));
assertNotNull(javaTableConfig.getKeySerde("t2"));
assertNotNull(javaTableConfig.getMsgSerde("t2"));
MapConfig tableConfig = new MapConfig(TableConfigGenerator.generate(jobConfig, descriptors));
javaTableConfig = new JavaTableConfig(tableConfig);
assertNotNull(javaTableConfig.getTableProviderFactory("t1"));
assertNotNull(javaTableConfig.getTableProviderFactory("t2"));
}
use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.
the class JobNodeConfigurationGenerator method generateJobConfig.
JobConfig generateJobConfig(JobNode jobNode, String executionPlanJson) {
if (jobNode.isLegacyTaskApplication()) {
return new JobConfig(jobNode.getConfig());
}
Map<String, String> generatedConfig = new HashMap<>();
// set up job name and job ID
generatedConfig.put(JobConfig.JOB_NAME, jobNode.getJobName());
generatedConfig.put(JobConfig.JOB_ID, jobNode.getJobId());
Map<String, StreamEdge> inEdges = jobNode.getInEdges();
Map<String, StreamEdge> outEdges = jobNode.getOutEdges();
Collection<OperatorSpec> reachableOperators = jobNode.getReachableOperators();
List<StoreDescriptor> stores = getStoreDescriptors(reachableOperators);
Map<String, TableDescriptor> reachableTables = getReachableTables(reachableOperators, jobNode);
// config passed by the JobPlanner. user-provided + system-stream descriptor config + misc. other config
Config originalConfig = jobNode.getConfig();
// check all inputs to the node for broadcast and input streams
final Set<String> inputs = new HashSet<>();
final Set<String> broadcastInputs = new HashSet<>();
for (StreamEdge inEdge : inEdges.values()) {
String formattedSystemStream = inEdge.getName();
if (inEdge.isBroadcast()) {
if (inEdge.getPartitionCount() > 1) {
broadcastInputs.add(formattedSystemStream + "#[0-" + (inEdge.getPartitionCount() - 1) + "]");
} else {
broadcastInputs.add(formattedSystemStream + "#0");
}
} else {
inputs.add(formattedSystemStream);
}
}
configureBroadcastInputs(generatedConfig, originalConfig, broadcastInputs);
// compute window and join operator intervals in this node
configureWindowInterval(generatedConfig, originalConfig, reachableOperators);
// set store configuration for stateful operators.
stores.forEach(sd -> generatedConfig.putAll(sd.getStorageConfigs()));
// set the execution plan in json
generatedConfig.put(CONFIG_INTERNAL_EXECUTION_PLAN, executionPlanJson);
// write intermediate input/output streams to configs
inEdges.values().stream().filter(StreamEdge::isIntermediate).forEach(intermediateEdge -> generatedConfig.putAll(intermediateEdge.generateConfig()));
// write serialized serde instances and stream, store, and table serdes to configs
// serde configuration generation has to happen before table configuration, since the serde configuration
// is required when generating configurations for some TableProvider (i.e. local store backed tables)
configureSerdes(generatedConfig, inEdges, outEdges, stores, reachableTables.keySet(), jobNode);
// generate table configuration and potential side input configuration
configureTables(generatedConfig, originalConfig, reachableTables, inputs);
// generate the task.inputs configuration
generatedConfig.put(TaskConfig.INPUT_STREAMS, Joiner.on(',').join(inputs));
LOG.info("Job {} has generated configs {}", jobNode.getJobNameAndId(), generatedConfig);
return new JobConfig(mergeConfig(originalConfig, generatedConfig));
}
use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.
the class JobNodeConfigurationGenerator method configureTables.
private void configureTables(Map<String, String> generatedConfig, Config originalConfig, Map<String, TableDescriptor> tables, Set<String> inputs) {
generatedConfig.putAll(TableConfigGenerator.generate(new MapConfig(generatedConfig), new ArrayList<>(tables.values())));
// Add side inputs to the inputs and mark the stream as bootstrap
tables.values().forEach(tableDescriptor -> {
if (tableDescriptor instanceof LocalTableDescriptor) {
LocalTableDescriptor localTableDescriptor = (LocalTableDescriptor) tableDescriptor;
List<String> sideInputs = localTableDescriptor.getSideInputs();
if (sideInputs != null && !sideInputs.isEmpty()) {
sideInputs.stream().map(sideInput -> StreamUtil.getSystemStreamFromNameOrId(originalConfig, sideInput)).forEach(systemStream -> {
inputs.add(StreamUtil.getNameFromSystemStream(systemStream));
generatedConfig.put(String.format(StreamConfig.STREAM_PREFIX + StreamConfig.BOOTSTRAP, systemStream.getSystem(), systemStream.getStream()), "true");
});
}
}
});
}
use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.
the class TestExecutionPlanner method createStreamGraphWithStreamTableJoinWithSideInputs.
private StreamApplicationDescriptorImpl createStreamGraphWithStreamTableJoinWithSideInputs() {
/**
* Example stream-table join where table t is configured with input1 (64) as a side-input stream.
*
* join-table t -> output1 (8)
* |
* input2 (16) -> partitionBy ("64") __|
*/
return new StreamApplicationDescriptorImpl(appDesc -> {
MessageStream<KV<Object, Object>> messageStream2 = appDesc.getInputStream(input2Descriptor);
OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor("table-id", new KVSerde(new StringSerde(), new StringSerde())).withSideInputs(Arrays.asList("input1")).withSideInputsProcessor(mock(SideInputsProcessor.class));
Table table = appDesc.getTable(tableDescriptor);
messageStream2.partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1").join(table, mock(StreamTableJoinFunction.class)).sendTo(output1);
}, config);
}
Aggregations