use of org.apache.samza.sql.planner.SamzaSqlValidator in project samza by apache.
the class TestSamzaSqlEndToEnd method testEndToEndMultiSqlStmts.
@Test
public void testEndToEndMultiSqlStmts() throws SamzaSqlValidatorException {
int numMessages = 20;
TestAvroSystemFactory.messages.clear();
Map<String, String> staticConfigs = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(numMessages);
String sql1 = "Insert into testavro.simpleOutputTopic select * from testavro.SIMPLE1";
String sql2 = "Insert into testavro.SIMPLE3 select * from testavro.SIMPLE2";
List<String> sqlStmts = Arrays.asList(sql1, sql2);
staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_STMTS_JSON, JsonUtil.toJson(sqlStmts));
Config config = new MapConfig(staticConfigs);
new SamzaSqlValidator(config).validate(sqlStmts);
runApplication(config);
List<Integer> outMessages = TestAvroSystemFactory.messages.stream().map(x -> Integer.valueOf(((GenericRecord) x.getMessage()).get("id").toString())).sorted().collect(Collectors.toList());
Assert.assertEquals(numMessages * 2, outMessages.size());
Set<Integer> outMessagesSet = new HashSet<>(outMessages);
Assert.assertEquals(numMessages, outMessagesSet.size());
Assert.assertTrue(IntStream.range(0, numMessages).boxed().collect(Collectors.toList()).equals(new ArrayList<>(outMessagesSet)));
}
use of org.apache.samza.sql.planner.SamzaSqlValidator in project samza by apache.
the class TestSamzaSqlEndToEnd method testEndToEndFanIn.
@Test
public void testEndToEndFanIn() throws SamzaSqlValidatorException {
int numMessages = 20;
TestAvroSystemFactory.messages.clear();
Map<String, String> staticConfigs = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(numMessages);
String sql1 = "Insert into testavro.simpleOutputTopic select * from testavro.SIMPLE2";
String sql2 = "Insert into testavro.simpleOutputTopic select * from testavro.SIMPLE1";
List<String> sqlStmts = Arrays.asList(sql1, sql2);
staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_STMTS_JSON, JsonUtil.toJson(sqlStmts));
Config config = new MapConfig(staticConfigs);
new SamzaSqlValidator(config).validate(sqlStmts);
runApplication(config);
List<Integer> outMessages = TestAvroSystemFactory.messages.stream().map(x -> Integer.valueOf(((GenericRecord) x.getMessage()).get("id").toString())).sorted().collect(Collectors.toList());
Assert.assertEquals(numMessages * 2, outMessages.size());
Set<Integer> outMessagesSet = new HashSet<>(outMessages);
Assert.assertEquals(numMessages, outMessagesSet.size());
Assert.assertTrue(IntStream.range(0, numMessages).boxed().collect(Collectors.toList()).equals(new ArrayList<>(outMessagesSet)));
}
use of org.apache.samza.sql.planner.SamzaSqlValidator in project samza by apache.
the class TestSamzaSqlEndToEnd method testEndToEndSubQuery.
@Test
public void testEndToEndSubQuery() {
int numMessages = 20;
TestAvroSystemFactory.messages.clear();
Map<String, String> staticConfigs = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(numMessages);
String sql1 = "Insert into testavro.outputTopic(id, bool_value) select Flatten(a) as id, true as bool_value" + " from (select MyTestArray(id) a from testavro.SIMPLE1)";
List<String> sqlStmts = Collections.singletonList(sql1);
staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_STMTS_JSON, JsonUtil.toJson(sqlStmts));
Config config = new MapConfig(staticConfigs);
new SamzaSqlValidator(config).validate(sqlStmts);
runApplication(config);
List<OutgoingMessageEnvelope> outMessages = new ArrayList<>(TestAvroSystemFactory.messages);
// Test invariant for each input Row with rank i will contain a column array_values with i elements $\sum_1^n{i}$.
int expectedMessages = (numMessages * (numMessages - 1)) / 2;
// Flatten de-normalizes the data. So there is separate record for each entry in the array.
Assert.assertEquals(expectedMessages, outMessages.size());
// check that values are actually not null and within the expected range
Optional<GenericRecord> nullValueRecord = outMessages.stream().map(x -> (GenericRecord) x.getMessage()).filter(x -> x.get("id") == null).findFirst();
Assert.assertFalse("Null value " + nullValueRecord.orElse(null), nullValueRecord.isPresent());
// TODO this is failing for now and that is because of udf weak type system, fixing it will be beyond this work.
/* // The String value column is result of dot product thus must be present in the Array column
Optional<GenericRecord> missingValue = outMessages.stream().map(x -> (GenericRecord) x.getMessage()).filter(x -> {
String value = (String) x.get("string_value");
List<Object> arrayValues = (List<Object>) x.get("array_values");
if (arrayValues == null) {
return true;
}
Optional<Object> notThere = arrayValues.stream().filter(v -> v.toString().equalsIgnoreCase(value)).findAny();
return !notThere.isPresent();
}).findFirst();
Assert.assertFalse("Absent Value " + missingValue.orElse(null), missingValue.isPresent());
*/
}
use of org.apache.samza.sql.planner.SamzaSqlValidator in project samza by apache.
the class TestSamzaSqlEndToEnd method testEndToEndStreamTableInnerJoinWithPrimaryKey.
@Test
public void testEndToEndStreamTableInnerJoinWithPrimaryKey() throws Exception {
int numMessages = 20;
TestAvroSystemFactory.messages.clear();
Map<String, String> staticConfigs = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(numMessages);
String sql = "Insert into testavro.enrichedPageViewTopic " + "select pv.pageKey as __key__, pv.pageKey as pageKey, coalesce(null, 'N/A') as companyName," + " p.name as profileName, p.address as profileAddress " + "from testavro.PROFILE.`$table` as p " + "join testavro.PAGEVIEW as pv " + " on p.__key__ = pv.profileId";
List<String> sqlStmts = Arrays.asList(sql);
staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_STMTS_JSON, JsonUtil.toJson(sqlStmts));
Config config = new MapConfig(staticConfigs);
new SamzaSqlValidator(config).validate(sqlStmts);
runApplication(config);
List<String> outMessages = TestAvroSystemFactory.messages.stream().map(x -> ((GenericRecord) x.getMessage()).get("pageKey").toString() + "," + (((GenericRecord) x.getMessage()).get("profileName") == null ? "null" : ((GenericRecord) x.getMessage()).get("profileName").toString())).collect(Collectors.toList());
Assert.assertEquals(numMessages, outMessages.size());
List<String> expectedOutMessages = TestAvroSystemFactory.getPageKeyProfileNameJoin(numMessages);
Assert.assertEquals(expectedOutMessages, outMessages);
}
use of org.apache.samza.sql.planner.SamzaSqlValidator in project samza by apache.
the class TestSamzaSqlEndToEnd method testEndToEndGroupBy.
// Disabling the test until SAMZA-1652 and SAMZA-1661 are fixed.
@Ignore
@Test
public void testEndToEndGroupBy() throws Exception {
int numMessages = 200;
long windowDurationMs = 200;
TestAvroSystemFactory.messages.clear();
Map<String, String> staticConfigs = SamzaSqlTestConfig.fetchStaticConfigsWithFactories(Collections.emptyMap(), numMessages, false, false, windowDurationMs);
String sql = "Insert into testavro.pageViewCountTopic" + " select 'SampleJob' as jobName, pv.pageKey, count(*) as `count`" + " from testavro.PAGEVIEW as pv" + " where pv.pageKey = 'job' or pv.pageKey = 'inbox'" + " group by (pv.pageKey)";
List<String> sqlStmts = Arrays.asList(sql);
staticConfigs.put(SamzaSqlApplicationConfig.CFG_SQL_STMTS_JSON, JsonUtil.toJson(sqlStmts));
Config config = new MapConfig(staticConfigs);
new SamzaSqlValidator(config).validate(sqlStmts);
runApplication(config);
// Let's capture the list of windows/counts per key.
HashMap<String, List<String>> pageKeyCountListMap = new HashMap<>();
TestAvroSystemFactory.messages.stream().map(x -> {
String pageKey = ((GenericRecord) x.getMessage()).get("pageKey").toString();
String count = ((GenericRecord) x.getMessage()).get("count").toString();
pageKeyCountListMap.computeIfAbsent(pageKey, k -> new ArrayList<>()).add(count);
return pageKeyCountListMap;
});
HashMap<String, Integer> pageKeyCountMap = new HashMap<>();
pageKeyCountListMap.forEach((key, list) -> {
// Check that the number of windows per key is non-zero but less than the number of input messages per key.
Assert.assertTrue(list.size() > 1 && list.size() < numMessages / TestAvroSystemFactory.PAGE_KEYS.length);
// Collapse the count of messages per key
pageKeyCountMap.put(key, list.stream().mapToInt(Integer::parseInt).sum());
});
Set<String> pageKeys = new HashSet<>(Arrays.asList("job", "inbox"));
HashMap<String, Integer> expectedPageKeyCountMap = TestAvroSystemFactory.getPageKeyGroupByResult(numMessages, pageKeys);
Assert.assertEquals(expectedPageKeyCountMap, pageKeyCountMap);
}
Aggregations