Search in sources :

Example 1 with TimeDataWindow

use of org.apache.storm.st.topology.window.data.TimeDataWindow in project storm by apache.

the class SlidingWindowTest method runAndVerifyTime.

static void runAndVerifyTime(int windowSec, int slideSec, TestableTopology testable, TopoWrap topo) throws TException, java.net.MalformedURLException {
    topo.submitSuccessfully();
    final int minSpoutEmits = 1000 + windowSec;
    final int minBoltEmits = 5;
    String boltName = testable.getBoltName();
    String spoutName = testable.getSpoutName();
    topo.waitForProgress(minSpoutEmits, spoutName, 60 + 10 * (windowSec + slideSec));
    topo.waitForProgress(minBoltEmits, boltName, 60 + 10 * (windowSec + slideSec));
    final List<TimeData> allSpoutData = topo.getLogData(spoutName, TimeData.CLS);
    final List<LogData> allBoltLog = topo.getLogData(boltName);
    final List<TimeDataWindow> allBoltData = topo.getLogData(boltName, TimeDataWindow.CLS);
    Assert.assertTrue(allBoltLog.size() >= minBoltEmits, "Expecting min " + minBoltEmits + " bolt emits, found: " + allBoltLog.size() + " \n\t" + allBoltLog);
    final DateTime firstEndTime = TimeUtil.ceil(new DateTime(allSpoutData.get(0).getDate()).withZone(DateTimeZone.UTC), slideSec);
    final int numberOfWindows = allBoltLog.size() - windowSec / slideSec;
    for (int i = 0; i < numberOfWindows; ++i) {
        final DateTime toDate = firstEndTime.plusSeconds(i * slideSec);
        final DateTime fromDate = toDate.minusSeconds(windowSec);
        log.info("Comparing window: " + fromDate + " to " + toDate + " iter " + (i + 1) + "/" + numberOfWindows);
        final TimeDataWindow computedWindow = TimeDataWindow.newInstance(allSpoutData, fromDate, toDate);
        final LogData oneBoltLog = allBoltLog.get(i);
        final TimeDataWindow actualWindow = allBoltData.get(i);
        log.info("Actual window: " + actualWindow.getDescription());
        log.info("Computed window: " + computedWindow.getDescription());
        for (TimeData oneLog : computedWindow) {
            Assert.assertTrue(actualWindow.contains(oneLog), String.format("Missing: '%s' \n\tActual: '%s' \n\tComputed window: '%s'", oneLog, oneBoltLog, computedWindow));
        }
        for (TimeData oneLog : actualWindow) {
            Assert.assertTrue(computedWindow.contains(oneLog), String.format("Extra: '%s' \n\tActual: '%s' \n\tComputed window: '%s'", oneLog, oneBoltLog, computedWindow));
        }
    }
}
Also used : TimeDataWindow(org.apache.storm.st.topology.window.data.TimeDataWindow) LogData(org.apache.storm.st.wrapper.LogData) TimeData(org.apache.storm.st.topology.window.data.TimeData) DateTime(org.joda.time.DateTime)

Example 2 with TimeDataWindow

use of org.apache.storm.st.topology.window.data.TimeDataWindow in project storm by apache.

the class WindowVerifier method runAndVerifyTime.

/**
 * Run the topology and verify that the number and contents of time based windows is as expected
 * once the spout and bolt have emitted sufficient tuples.
 * The spout and bolt are required to log exactly one log line per emit/window using {@link StringDecorator}
 */
public void runAndVerifyTime(int windowSec, int slideSec, TestableTopology testable, TopoWrap topo) throws IOException, TException, java.net.MalformedURLException {
    topo.submitSuccessfully();
    final int minSpoutEmits = 100;
    final int minBoltEmits = 5;
    String boltName = testable.getBoltName();
    String spoutName = testable.getSpoutName();
    // Waiting for spout tuples isn't strictly necessary since we also wait for bolt emits, but do it anyway
    // Allow two minutes for topology startup, then wait for at most the time it should take to produce 10 windows
    topo.assertProgress(minSpoutEmits, testable.getSpoutExecutors(), spoutName, 180 + 10 * slideSec);
    topo.assertProgress(minBoltEmits, testable.getBoltExecutors(), boltName, 180 + 10 * slideSec);
    final List<TimeData> allSpoutLogLines = topo.getDeserializedDecoratedLogLines(spoutName, TimeData::fromJson);
    final List<TimeDataWindow> allBoltLogLines = topo.getDeserializedDecoratedLogLines(boltName, TimeDataWindow::fromJson);
    Assert.assertTrue(allBoltLogLines.size() >= minBoltEmits, "Expecting min " + minBoltEmits + " bolt emits, found: " + allBoltLogLines.size() + " \n\t" + allBoltLogLines);
    final DateTime firstWindowEndTime = TimeUtil.ceil(new DateTime(allSpoutLogLines.get(0).getDate()).withZone(DateTimeZone.UTC), slideSec);
    final int numberOfWindows = allBoltLogLines.size();
    /*
         * Windows should be aligned to the slide size, starting at firstWindowEndTime - windowSec.
         * Because all windows are aligned to the slide size, we can partition the spout emitted timestamps by which window they should fall in.
         * This checks that the partitioned spout emits fall in the expected windows, based on the logs from the spout and bolt.
         */
    for (int i = 0; i < numberOfWindows; ++i) {
        final DateTime windowEnd = firstWindowEndTime.plusSeconds(i * slideSec);
        final DateTime windowStart = windowEnd.minusSeconds(windowSec);
        LOG.info("Comparing window: " + windowStart + " to " + windowEnd + " iter " + (i + 1) + "/" + numberOfWindows);
        final List<TimeData> expectedSpoutEmitsInWindow = allSpoutLogLines.stream().filter(spoutLog -> {
            DateTime spoutLogTime = new DateTime(spoutLog.getDate());
            // The window boundaries are )windowStart, windowEnd)
            return spoutLogTime.isAfter(windowStart) && spoutLogTime.isBefore(windowEnd.plusMillis(1));
        }).collect(Collectors.toList());
        TimeDataWindow expectedWindow = new TimeDataWindow(expectedSpoutEmitsInWindow);
        final TimeDataWindow actualWindow = allBoltLogLines.get(i);
        LOG.info("Actual window: " + actualWindow.getDescription());
        LOG.info("Expected window: " + expectedWindow.getDescription());
        for (TimeData oneLog : expectedWindow.getTimeData()) {
            Assertions.assertTrue(actualWindow.getTimeData().contains(oneLog), () -> String.format("Missing: '%s' \n\tActual: '%s' \n\tComputed window: '%s'", oneLog, actualWindow, expectedWindow));
        }
        for (TimeData oneLog : actualWindow.getTimeData()) {
            Assertions.assertTrue(expectedWindow.getTimeData().contains(oneLog), () -> String.format("Extra: '%s' \n\tActual: '%s' \n\tComputed window: '%s'", oneLog, actualWindow, expectedWindow));
        }
    }
}
Also used : TimeDataWindow(org.apache.storm.st.topology.window.data.TimeDataWindow) DateTimeZone(org.joda.time.DateTimeZone) Logger(org.slf4j.Logger) MalformedURLException(java.net.MalformedURLException) StringDecorator(org.apache.storm.st.utils.StringDecorator) DecoratedLogLine(org.apache.storm.st.wrapper.DecoratedLogLine) DateTime(org.joda.time.DateTime) LoggerFactory(org.slf4j.LoggerFactory) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) TException(org.apache.storm.thrift.TException) TimeData(org.apache.storm.st.topology.window.data.TimeData) List(java.util.List) TopoWrap(org.apache.storm.st.wrapper.TopoWrap) Assert(org.testng.Assert) Assertions(org.junit.jupiter.api.Assertions) TimeUtil(org.apache.storm.st.utils.TimeUtil) TimeDataWindow(org.apache.storm.st.topology.window.data.TimeDataWindow) TestableTopology(org.apache.storm.st.topology.TestableTopology) TimeData(org.apache.storm.st.topology.window.data.TimeData) DateTime(org.joda.time.DateTime)

Aggregations

TimeData (org.apache.storm.st.topology.window.data.TimeData)2 TimeDataWindow (org.apache.storm.st.topology.window.data.TimeDataWindow)2 DateTime (org.joda.time.DateTime)2 IOException (java.io.IOException)1 MalformedURLException (java.net.MalformedURLException)1 List (java.util.List)1 Collectors (java.util.stream.Collectors)1 TestableTopology (org.apache.storm.st.topology.TestableTopology)1 StringDecorator (org.apache.storm.st.utils.StringDecorator)1 TimeUtil (org.apache.storm.st.utils.TimeUtil)1 DecoratedLogLine (org.apache.storm.st.wrapper.DecoratedLogLine)1 LogData (org.apache.storm.st.wrapper.LogData)1 TopoWrap (org.apache.storm.st.wrapper.TopoWrap)1 TException (org.apache.storm.thrift.TException)1 DateTimeZone (org.joda.time.DateTimeZone)1 Assertions (org.junit.jupiter.api.Assertions)1 Logger (org.slf4j.Logger)1 LoggerFactory (org.slf4j.LoggerFactory)1 Assert (org.testng.Assert)1