Search in sources :

Example 1 with LongSerde

use of org.apache.samza.serializers.LongSerde in project samza by apache.

the class LogicalAggregateTranslator method translate.

void translate(final LogicalAggregate aggregate, final TranslatorContext context) {
    validateAggregateFunctions(aggregate);
    MessageStream<SamzaSqlRelMessage> inputStream = context.getMessageStream(aggregate.getInput().getId());
    // At this point, the assumption is that only count function is supported.
    SupplierFunction<Long> initialValue = () -> (long) 0;
    FoldLeftFunction<SamzaSqlRelMessage, Long> foldCountFn = (m, c) -> c + 1;
    final ArrayList<String> aggFieldNames = getAggFieldNames(aggregate);
    MessageStream<SamzaSqlRelMessage> outputStream = inputStream.map(new TranslatorInputMetricsMapFunction(logicalOpId)).window(Windows.keyedTumblingWindow(m -> m, Duration.ofMillis(context.getExecutionContext().getSamzaSqlApplicationConfig().getWindowDurationMs()), initialValue, foldCountFn, new SamzaSqlRelMessageSerdeFactory.SamzaSqlRelMessageSerde(), new LongSerde()).setAccumulationMode(AccumulationMode.DISCARDING), changeLogStorePrefix + "_tumblingWindow_" + logicalOpId).map(windowPane -> {
        List<String> fieldNames = windowPane.getKey().getKey().getSamzaSqlRelRecord().getFieldNames();
        List<Object> fieldValues = windowPane.getKey().getKey().getSamzaSqlRelRecord().getFieldValues();
        fieldNames.add(aggFieldNames.get(0));
        fieldValues.add(windowPane.getMessage());
        return new SamzaSqlRelMessage(fieldNames, fieldValues, new SamzaSqlRelMsgMetadata(0L, 0L));
    });
    context.registerMessageStream(aggregate.getId(), outputStream);
    outputStream.map(new TranslatorOutputMetricsMapFunction(logicalOpId));
}
Also used : SqlKind(org.apache.calcite.sql.SqlKind) Windows(org.apache.samza.operators.windows.Windows) Logger(org.slf4j.Logger) SamzaSqlRelMsgMetadata(org.apache.samza.sql.data.SamzaSqlRelMsgMetadata) LoggerFactory(org.slf4j.LoggerFactory) SamzaException(org.apache.samza.SamzaException) ArrayList(java.util.ArrayList) List(java.util.List) LogicalAggregate(org.apache.calcite.rel.logical.LogicalAggregate) AccumulationMode(org.apache.samza.operators.windows.AccumulationMode) Duration(java.time.Duration) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage) SamzaSqlRelMessageSerdeFactory(org.apache.samza.sql.serializers.SamzaSqlRelMessageSerdeFactory) FoldLeftFunction(org.apache.samza.operators.functions.FoldLeftFunction) SupplierFunction(org.apache.samza.operators.functions.SupplierFunction) MessageStream(org.apache.samza.operators.MessageStream) LongSerde(org.apache.samza.serializers.LongSerde) LongSerde(org.apache.samza.serializers.LongSerde) SamzaSqlRelMsgMetadata(org.apache.samza.sql.data.SamzaSqlRelMsgMetadata) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage)

Example 2 with LongSerde

use of org.apache.samza.serializers.LongSerde in project samza by apache.

the class TestTimeSeriesKeySerde method testLongTimeSeriesKey.

@Test
public void testLongTimeSeriesKey() {
    TimeSeriesKey<Long> storeKey = new TimeSeriesKey<>(30L, 1, 23);
    TimeSeriesKeySerde<Long> serde = new TimeSeriesKeySerde<>(new LongSerde());
    byte[] serializedBytes = serde.toBytes(storeKey);
    TimeSeriesKey<Long> deserializedTimeSeriesKey = serde.fromBytes(serializedBytes);
    assertEquals(storeKey.getKey(), deserializedTimeSeriesKey.getKey());
    assertEquals(storeKey.getSeqNum(), deserializedTimeSeriesKey.getSeqNum());
    assertEquals(storeKey.getTimestamp(), deserializedTimeSeriesKey.getTimestamp());
    assertEquals(storeKey, deserializedTimeSeriesKey);
}
Also used : LongSerde(org.apache.samza.serializers.LongSerde) Test(org.junit.Test)

Example 3 with LongSerde

use of org.apache.samza.serializers.LongSerde in project samza by apache.

the class TestJobGraphJsonGenerator method testRepartitionedWindowStreamApplication.

@Test
public void testRepartitionedWindowStreamApplication() throws Exception {
    Map<String, String> configMap = new HashMap<>();
    configMap.put(JobConfig.JOB_NAME, "test-app");
    configMap.put(JobConfig.JOB_DEFAULT_SYSTEM, "test-system");
    StreamTestUtils.addStreamConfigs(configMap, "PageView", "hdfs", "hdfs:/user/dummy/PageViewEvent");
    StreamTestUtils.addStreamConfigs(configMap, "PageViewCount", "kafka", "PageViewCount");
    Config config = new MapConfig(configMap);
    // set up external partition count
    Map<String, Integer> system1Map = new HashMap<>();
    system1Map.put("hdfs:/user/dummy/PageViewEvent", 512);
    Map<String, Integer> system2Map = new HashMap<>();
    system2Map.put("PageViewCount", 16);
    SystemAdmin systemAdmin1 = createSystemAdmin(system1Map);
    SystemAdmin systemAdmin2 = createSystemAdmin(system2Map);
    SystemAdmins systemAdmins = mock(SystemAdmins.class);
    when(systemAdmins.getSystemAdmin("hdfs")).thenReturn(systemAdmin1);
    when(systemAdmins.getSystemAdmin("kafka")).thenReturn(systemAdmin2);
    StreamManager streamManager = new StreamManager(systemAdmins);
    StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
        KVSerde<String, PageViewEvent> pvSerde = KVSerde.of(new StringSerde(), new JsonSerdeV2<>(PageViewEvent.class));
        GenericSystemDescriptor isd = new GenericSystemDescriptor("hdfs", "mockSystemFactoryClass");
        GenericInputDescriptor<KV<String, PageViewEvent>> pageView = isd.getInputDescriptor("PageView", pvSerde);
        KVSerde<String, Long> pvcSerde = KVSerde.of(new StringSerde(), new LongSerde());
        GenericSystemDescriptor osd = new GenericSystemDescriptor("kafka", "mockSystemFactoryClass");
        GenericOutputDescriptor<KV<String, Long>> pageViewCount = osd.getOutputDescriptor("PageViewCount", pvcSerde);
        MessageStream<KV<String, PageViewEvent>> inputStream = appDesc.getInputStream(pageView);
        OutputStream<KV<String, Long>> outputStream = appDesc.getOutputStream(pageViewCount);
        inputStream.partitionBy(kv -> kv.getValue().getCountry(), kv -> kv.getValue(), pvSerde, "keyed-by-country").window(Windows.keyedTumblingWindow(kv -> kv.getValue().getCountry(), Duration.ofSeconds(10L), () -> 0L, (m, c) -> c + 1L, new StringSerde(), new LongSerde()), "count-by-country").map(pane -> new KV<>(pane.getKey().getKey(), pane.getMessage())).sendTo(outputStream);
    }, config);
    ExecutionPlanner planner = new ExecutionPlanner(config, streamManager);
    ExecutionPlan plan = planner.plan(graphSpec);
    String json = plan.getPlanAsJson();
    System.out.println(json);
    // deserialize
    ObjectMapper mapper = new ObjectMapper();
    JobGraphJsonGenerator.JobGraphJson nodes = mapper.readValue(json, JobGraphJsonGenerator.JobGraphJson.class);
    JobGraphJsonGenerator.OperatorGraphJson operatorGraphJson = nodes.jobs.get(0).operatorGraph;
    assertEquals(2, operatorGraphJson.inputStreams.size());
    assertEquals(4, operatorGraphJson.operators.size());
    assertEquals(1, nodes.sourceStreams.size());
    assertEquals(1, nodes.sinkStreams.size());
    assertEquals(1, nodes.intermediateStreams.size());
    // verify partitionBy op output to the intermdiate stream of the same id
    assertEquals(operatorGraphJson.operators.get("test-app-1-partition_by-keyed-by-country").get("outputStreamId"), "test-app-1-partition_by-keyed-by-country");
    assertEquals(operatorGraphJson.operators.get("test-app-1-send_to-5").get("outputStreamId"), "PageViewCount");
}
Also used : LongSerde(org.apache.samza.serializers.LongSerde) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) JobConfig(org.apache.samza.config.JobConfig) HashMap(java.util.HashMap) Serde(org.apache.samza.serializers.Serde) TestLocalTableDescriptor(org.apache.samza.table.descriptors.TestLocalTableDescriptor) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) SendToTableOperatorSpec(org.apache.samza.operators.spec.SendToTableOperatorSpec) StringSerde(org.apache.samza.serializers.StringSerde) HashSet(java.util.HashSet) OperatorSpec(org.apache.samza.operators.spec.OperatorSpec) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) TestExecutionPlanner(org.apache.samza.execution.TestExecutionPlanner) Duration(java.time.Duration) Map(java.util.Map) ApplicationConfig(org.apache.samza.config.ApplicationConfig) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) NoOpSerde(org.apache.samza.serializers.NoOpSerde) MessageStream(org.apache.samza.operators.MessageStream) LongSerde(org.apache.samza.serializers.LongSerde) Before(org.junit.Before) Windows(org.apache.samza.operators.windows.Windows) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Set(java.util.Set) Matchers(org.hamcrest.Matchers) StreamSpec(org.apache.samza.system.StreamSpec) Test(org.junit.Test) JoinFunction(org.apache.samza.operators.functions.JoinFunction) Collectors(java.util.stream.Collectors) Mockito(org.mockito.Mockito) OperatorSpecs(org.apache.samza.operators.spec.OperatorSpecs) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec) SystemAdmin(org.apache.samza.system.SystemAdmin) Config(org.apache.samza.config.Config) JsonSerdeV2(org.apache.samza.serializers.JsonSerdeV2) KVSerde(org.apache.samza.serializers.KVSerde) Assert(org.junit.Assert) Collections(java.util.Collections) OutputStream(org.apache.samza.operators.OutputStream) SystemAdmins(org.apache.samza.system.SystemAdmins) StringSerde(org.apache.samza.serializers.StringSerde) HashMap(java.util.HashMap) JobConfig(org.apache.samza.config.JobConfig) ApplicationConfig(org.apache.samza.config.ApplicationConfig) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) TestExecutionPlanner(org.apache.samza.execution.TestExecutionPlanner) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) MapConfig(org.apache.samza.config.MapConfig) SystemAdmins(org.apache.samza.system.SystemAdmins) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) KV(org.apache.samza.operators.KV) SystemAdmin(org.apache.samza.system.SystemAdmin) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) Test(org.junit.Test)

Aggregations

LongSerde (org.apache.samza.serializers.LongSerde)3 Duration (java.time.Duration)2 MessageStream (org.apache.samza.operators.MessageStream)2 Windows (org.apache.samza.operators.windows.Windows)2 Test (org.junit.Test)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 ArrayList (java.util.ArrayList)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 Collectors (java.util.stream.Collectors)1 LogicalAggregate (org.apache.calcite.rel.logical.LogicalAggregate)1 SqlKind (org.apache.calcite.sql.SqlKind)1 SamzaException (org.apache.samza.SamzaException)1 StreamApplicationDescriptorImpl (org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl)1 ApplicationConfig (org.apache.samza.config.ApplicationConfig)1 Config (org.apache.samza.config.Config)1