Search in sources :

Example 11 with SamzaSqlValidator

use of org.apache.samza.sql.planner.SamzaSqlValidator in project samza by apache.

the class TestSamzaSqlEndToEnd method testEndToEndMultiSqlStmts.

@Test
public void testEndToEndMultiSqlStmts() throws SamzaSqlValidatorException {
    int numMessages = 20;
    TestAvroSystemFactory.messages.clear();
    Map<String, String> staticConfigs = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(numMessages);
    String sql1 = "Insert into testavro.simpleOutputTopic select * from testavro.SIMPLE1";
    String sql2 = "Insert into testavro.SIMPLE3 select * from testavro.SIMPLE2";
    List<String> sqlStmts = Arrays.asList(sql1, sql2);
    staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_STMTS_JSON, JsonUtil.toJson(sqlStmts));
    Config config = new MapConfig(staticConfigs);
    new SamzaSqlValidator(config).validate(sqlStmts);
    runApplication(config);
    List<Integer> outMessages = TestAvroSystemFactory.messages.stream().map(x -> Integer.valueOf(((GenericRecord) x.getMessage()).get("id").toString())).sorted().collect(Collectors.toList());
    Assert.assertEquals(numMessages * 2, outMessages.size());
    Set<Integer> outMessagesSet = new HashSet<>(outMessages);
    Assert.assertEquals(numMessages, outMessagesSet.size());
    Assert.assertTrue(IntStream.range(0, numMessages).boxed().collect(Collectors.toList()).equals(new ArrayList<>(outMessagesSet)));
}
Also used : SamzaSqlValidator(org.apache.samza.sql.planner.SamzaSqlValidator) MapConfig(org.apache.samza.config.MapConfig) SamzaSqlTestConfig(org.apache.samza.sql.util.SamzaSqlTestConfig) SamzaSqlApplicationConfig(org.apache.samza.sql.runner.SamzaSqlApplicationConfig) Config(org.apache.samza.config.Config) ArrayList(java.util.ArrayList) MapConfig(org.apache.samza.config.MapConfig) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 12 with SamzaSqlValidator

use of org.apache.samza.sql.planner.SamzaSqlValidator in project samza by apache.

the class TestSamzaSqlEndToEnd method testEndToEndFanIn.

@Test
public void testEndToEndFanIn() throws SamzaSqlValidatorException {
    int numMessages = 20;
    TestAvroSystemFactory.messages.clear();
    Map<String, String> staticConfigs = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(numMessages);
    String sql1 = "Insert into testavro.simpleOutputTopic select * from testavro.SIMPLE2";
    String sql2 = "Insert into testavro.simpleOutputTopic select * from testavro.SIMPLE1";
    List<String> sqlStmts = Arrays.asList(sql1, sql2);
    staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_STMTS_JSON, JsonUtil.toJson(sqlStmts));
    Config config = new MapConfig(staticConfigs);
    new SamzaSqlValidator(config).validate(sqlStmts);
    runApplication(config);
    List<Integer> outMessages = TestAvroSystemFactory.messages.stream().map(x -> Integer.valueOf(((GenericRecord) x.getMessage()).get("id").toString())).sorted().collect(Collectors.toList());
    Assert.assertEquals(numMessages * 2, outMessages.size());
    Set<Integer> outMessagesSet = new HashSet<>(outMessages);
    Assert.assertEquals(numMessages, outMessagesSet.size());
    Assert.assertTrue(IntStream.range(0, numMessages).boxed().collect(Collectors.toList()).equals(new ArrayList<>(outMessagesSet)));
}
Also used : SamzaSqlValidator(org.apache.samza.sql.planner.SamzaSqlValidator) MapConfig(org.apache.samza.config.MapConfig) SamzaSqlTestConfig(org.apache.samza.sql.util.SamzaSqlTestConfig) SamzaSqlApplicationConfig(org.apache.samza.sql.runner.SamzaSqlApplicationConfig) Config(org.apache.samza.config.Config) ArrayList(java.util.ArrayList) MapConfig(org.apache.samza.config.MapConfig) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 13 with SamzaSqlValidator

use of org.apache.samza.sql.planner.SamzaSqlValidator in project samza by apache.

the class TestSamzaSqlEndToEnd method testEndToEndSubQuery.

@Test
public void testEndToEndSubQuery() {
    int numMessages = 20;
    TestAvroSystemFactory.messages.clear();
    Map<String, String> staticConfigs = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(numMessages);
    String sql1 = "Insert into testavro.outputTopic(id, bool_value) select Flatten(a) as id, true as bool_value" + " from (select MyTestArray(id) a from testavro.SIMPLE1)";
    List<String> sqlStmts = Collections.singletonList(sql1);
    staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_STMTS_JSON, JsonUtil.toJson(sqlStmts));
    Config config = new MapConfig(staticConfigs);
    new SamzaSqlValidator(config).validate(sqlStmts);
    runApplication(config);
    List<OutgoingMessageEnvelope> outMessages = new ArrayList<>(TestAvroSystemFactory.messages);
    // Test invariant for each input Row with rank i will contain a column array_values with i elements $\sum_1^n{i}$.
    int expectedMessages = (numMessages * (numMessages - 1)) / 2;
    // Flatten de-normalizes the data. So there is separate record for each entry in the array.
    Assert.assertEquals(expectedMessages, outMessages.size());
    // check that values are actually not null and within the expected range
    Optional<GenericRecord> nullValueRecord = outMessages.stream().map(x -> (GenericRecord) x.getMessage()).filter(x -> x.get("id") == null).findFirst();
    Assert.assertFalse("Null value " + nullValueRecord.orElse(null), nullValueRecord.isPresent());
// TODO this is failing for now and that is because of udf weak type system, fixing it will be beyond this work.
/* // The String value column is result of dot product thus must be present in the Array column
    Optional<GenericRecord> missingValue = outMessages.stream().map(x -> (GenericRecord) x.getMessage()).filter(x -> {
      String value = (String) x.get("string_value");
      List<Object> arrayValues = (List<Object>) x.get("array_values");
      if (arrayValues == null) {
        return true;
      }
      Optional<Object> notThere = arrayValues.stream().filter(v -> v.toString().equalsIgnoreCase(value)).findAny();
      return !notThere.isPresent();
    }).findFirst();
    Assert.assertFalse("Absent Value " + missingValue.orElse(null), missingValue.isPresent());
    */
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) TestAvroSystemFactory(org.apache.samza.sql.system.TestAvroSystemFactory) SamzaSqlValidatorException(org.apache.samza.sql.planner.SamzaSqlValidatorException) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MyTestUdf(org.apache.samza.sql.util.MyTestUdf) HashSet(java.util.HashSet) Map(java.util.Map) MapConfig(org.apache.samza.config.MapConfig) GenericRecord(org.apache.avro.generic.GenericRecord) Logger(org.slf4j.Logger) SampleRelConverterFactory(org.apache.samza.sql.util.SampleRelConverterFactory) SamzaSqlTestConfig(org.apache.samza.sql.util.SamzaSqlTestConfig) Set(java.util.Set) SamzaSqlApplicationConfig(org.apache.samza.sql.runner.SamzaSqlApplicationConfig) SamzaSqlValidator(org.apache.samza.sql.planner.SamzaSqlValidator) Test(org.junit.Test) JsonUtil(org.apache.samza.sql.util.JsonUtil) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) Ignore(org.junit.Ignore) Optional(java.util.Optional) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) Config(org.apache.samza.config.Config) Assert(org.junit.Assert) Collections(java.util.Collections) SamzaSqlValidator(org.apache.samza.sql.planner.SamzaSqlValidator) MapConfig(org.apache.samza.config.MapConfig) SamzaSqlTestConfig(org.apache.samza.sql.util.SamzaSqlTestConfig) SamzaSqlApplicationConfig(org.apache.samza.sql.runner.SamzaSqlApplicationConfig) Config(org.apache.samza.config.Config) ArrayList(java.util.ArrayList) MapConfig(org.apache.samza.config.MapConfig) GenericRecord(org.apache.avro.generic.GenericRecord) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) Test(org.junit.Test)

Example 14 with SamzaSqlValidator

use of org.apache.samza.sql.planner.SamzaSqlValidator in project samza by apache.

the class TestSamzaSqlEndToEnd method testEndToEndStreamTableInnerJoinWithPrimaryKey.

@Test
public void testEndToEndStreamTableInnerJoinWithPrimaryKey() throws Exception {
    int numMessages = 20;
    TestAvroSystemFactory.messages.clear();
    Map<String, String> staticConfigs = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(numMessages);
    String sql = "Insert into testavro.enrichedPageViewTopic " + "select pv.pageKey as __key__, pv.pageKey as pageKey, coalesce(null, 'N/A') as companyName," + "       p.name as profileName, p.address as profileAddress " + "from testavro.PROFILE.`$table` as p " + "join testavro.PAGEVIEW as pv " + " on p.__key__ = pv.profileId";
    List<String> sqlStmts = Arrays.asList(sql);
    staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_STMTS_JSON, JsonUtil.toJson(sqlStmts));
    Config config = new MapConfig(staticConfigs);
    new SamzaSqlValidator(config).validate(sqlStmts);
    runApplication(config);
    List<String> outMessages = TestAvroSystemFactory.messages.stream().map(x -> ((GenericRecord) x.getMessage()).get("pageKey").toString() + "," + (((GenericRecord) x.getMessage()).get("profileName") == null ? "null" : ((GenericRecord) x.getMessage()).get("profileName").toString())).collect(Collectors.toList());
    Assert.assertEquals(numMessages, outMessages.size());
    List<String> expectedOutMessages = TestAvroSystemFactory.getPageKeyProfileNameJoin(numMessages);
    Assert.assertEquals(expectedOutMessages, outMessages);
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) TestAvroSystemFactory(org.apache.samza.sql.system.TestAvroSystemFactory) SamzaSqlValidatorException(org.apache.samza.sql.planner.SamzaSqlValidatorException) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MyTestUdf(org.apache.samza.sql.util.MyTestUdf) HashSet(java.util.HashSet) Map(java.util.Map) MapConfig(org.apache.samza.config.MapConfig) GenericRecord(org.apache.avro.generic.GenericRecord) Logger(org.slf4j.Logger) SampleRelConverterFactory(org.apache.samza.sql.util.SampleRelConverterFactory) SamzaSqlTestConfig(org.apache.samza.sql.util.SamzaSqlTestConfig) Set(java.util.Set) SamzaSqlApplicationConfig(org.apache.samza.sql.runner.SamzaSqlApplicationConfig) SamzaSqlValidator(org.apache.samza.sql.planner.SamzaSqlValidator) Test(org.junit.Test) JsonUtil(org.apache.samza.sql.util.JsonUtil) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) Ignore(org.junit.Ignore) Optional(java.util.Optional) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) Config(org.apache.samza.config.Config) Assert(org.junit.Assert) Collections(java.util.Collections) SamzaSqlValidator(org.apache.samza.sql.planner.SamzaSqlValidator) MapConfig(org.apache.samza.config.MapConfig) SamzaSqlTestConfig(org.apache.samza.sql.util.SamzaSqlTestConfig) SamzaSqlApplicationConfig(org.apache.samza.sql.runner.SamzaSqlApplicationConfig) Config(org.apache.samza.config.Config) MapConfig(org.apache.samza.config.MapConfig) GenericRecord(org.apache.avro.generic.GenericRecord) Test(org.junit.Test)

Example 15 with SamzaSqlValidator

use of org.apache.samza.sql.planner.SamzaSqlValidator in project samza by apache.

the class TestSamzaSqlEndToEnd method testEndToEndGroupBy.

// Disabling the test until SAMZA-1652 and SAMZA-1661 are fixed.
@Ignore
@Test
public void testEndToEndGroupBy() throws Exception {
    int numMessages = 200;
    long windowDurationMs = 200;
    TestAvroSystemFactory.messages.clear();
    Map<String, String> staticConfigs = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(Collections.emptyMap(), numMessages, false, false, windowDurationMs);
    String sql = "Insert into testavro.pageViewCountTopic" + " select 'SampleJob' as jobName, pv.pageKey, count(*) as `count`" + " from testavro.PAGEVIEW as pv" + " where pv.pageKey = 'job' or pv.pageKey = 'inbox'" + " group by (pv.pageKey)";
    List<String> sqlStmts = Arrays.asList(sql);
    staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_STMTS_JSON, JsonUtil.toJson(sqlStmts));
    Config config = new MapConfig(staticConfigs);
    new SamzaSqlValidator(config).validate(sqlStmts);
    runApplication(config);
    // Let's capture the list of windows/counts per key.
    HashMap<String, List<String>> pageKeyCountListMap = new HashMap<>();
    TestAvroSystemFactory.messages.stream().map(x -> {
        String pageKey = ((GenericRecord) x.getMessage()).get("pageKey").toString();
        String count = ((GenericRecord) x.getMessage()).get("count").toString();
        pageKeyCountListMap.computeIfAbsent(pageKey, k -> new ArrayList<>()).add(count);
        return pageKeyCountListMap;
    });
    HashMap<String, Integer> pageKeyCountMap = new HashMap<>();
    pageKeyCountListMap.forEach((key, list) -> {
        // Check that the number of windows per key is non-zero but less than the number of input messages per key.
        Assert.assertTrue(list.size() > 1 && list.size() < numMessages / TestAvroSystemFactory.PAGE_KEYS.length);
        // Collapse the count of messages per key
        pageKeyCountMap.put(key, list.stream().mapToInt(Integer::parseInt).sum());
    });
    Set<String> pageKeys = new HashSet<>(Arrays.asList("job", "inbox"));
    HashMap<String, Integer> expectedPageKeyCountMap = TestAvroSystemFactory.getPageKeyGroupByResult(numMessages, pageKeys);
    Assert.assertEquals(expectedPageKeyCountMap, pageKeyCountMap);
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) TestAvroSystemFactory(org.apache.samza.sql.system.TestAvroSystemFactory) SamzaSqlValidatorException(org.apache.samza.sql.planner.SamzaSqlValidatorException) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MyTestUdf(org.apache.samza.sql.util.MyTestUdf) HashSet(java.util.HashSet) Map(java.util.Map) MapConfig(org.apache.samza.config.MapConfig) GenericRecord(org.apache.avro.generic.GenericRecord) Logger(org.slf4j.Logger) SampleRelConverterFactory(org.apache.samza.sql.util.SampleRelConverterFactory) SamzaSqlTestConfig(org.apache.samza.sql.util.SamzaSqlTestConfig) Set(java.util.Set) SamzaSqlApplicationConfig(org.apache.samza.sql.runner.SamzaSqlApplicationConfig) SamzaSqlValidator(org.apache.samza.sql.planner.SamzaSqlValidator) Test(org.junit.Test) JsonUtil(org.apache.samza.sql.util.JsonUtil) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) Ignore(org.junit.Ignore) Optional(java.util.Optional) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) Config(org.apache.samza.config.Config) Assert(org.junit.Assert) Collections(java.util.Collections) SamzaSqlValidator(org.apache.samza.sql.planner.SamzaSqlValidator) HashMap(java.util.HashMap) MapConfig(org.apache.samza.config.MapConfig) SamzaSqlTestConfig(org.apache.samza.sql.util.SamzaSqlTestConfig) SamzaSqlApplicationConfig(org.apache.samza.sql.runner.SamzaSqlApplicationConfig) Config(org.apache.samza.config.Config) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) MapConfig(org.apache.samza.config.MapConfig) HashSet(java.util.HashSet) Ignore(org.junit.Ignore) Test(org.junit.Test)

Aggregations

Config (org.apache.samza.config.Config)55 MapConfig (org.apache.samza.config.MapConfig)55 SamzaSqlValidator (org.apache.samza.sql.planner.SamzaSqlValidator)55 SamzaSqlApplicationConfig (org.apache.samza.sql.runner.SamzaSqlApplicationConfig)55 SamzaSqlTestConfig (org.apache.samza.sql.util.SamzaSqlTestConfig)55 Test (org.junit.Test)55 Ignore (org.junit.Ignore)28 ArrayList (java.util.ArrayList)27 HashMap (java.util.HashMap)25 Arrays (java.util.Arrays)24 List (java.util.List)24 Map (java.util.Map)24 Collectors (java.util.stream.Collectors)24 GenericRecord (org.apache.avro.generic.GenericRecord)24 SamzaSqlValidatorException (org.apache.samza.sql.planner.SamzaSqlValidatorException)24 TestAvroSystemFactory (org.apache.samza.sql.system.TestAvroSystemFactory)24 JsonUtil (org.apache.samza.sql.util.JsonUtil)24 Assert (org.junit.Assert)24 OutgoingMessageEnvelope (org.apache.samza.system.OutgoingMessageEnvelope)23 HashSet (java.util.HashSet)22