use of org.apache.druid.data.input.Row in project druid by druid-io.
the class PrefetchableTextFilesFirehoseFactoryTest method testReconnectWithCacheAndPrefetch.
@Test
public void testReconnectWithCacheAndPrefetch() throws IOException {
final TestPrefetchableTextFilesFirehoseFactory factory = TestPrefetchableTextFilesFirehoseFactory.of(TEST_DIR);
final File firehoseTmpDir = createFirehoseTmpDir("testReconnectWithCacheAndPrefetch");
for (int i = 0; i < 5; i++) {
final List<Row> rows = new ArrayList<>();
try (Firehose firehose = factory.connect(PARSER, firehoseTmpDir)) {
if (i > 0) {
Assert.assertEquals(FILE_SIZE * 2, factory.getCacheManager().getTotalCachedBytes());
}
while (firehose.hasMore()) {
rows.add(firehose.nextRow());
}
}
assertResult(rows);
assertNumRemainingCacheFiles(firehoseTmpDir, 2);
}
}
use of org.apache.druid.data.input.Row in project druid by druid-io.
the class PrefetchableTextFilesFirehoseFactoryTest method testWithoutCacheAndFetch.
@Test
public void testWithoutCacheAndFetch() throws IOException {
final TestPrefetchableTextFilesFirehoseFactory factory = TestPrefetchableTextFilesFirehoseFactory.with(TEST_DIR, 0, 0);
final List<Row> rows = new ArrayList<>();
final File firehoseTmpDir = createFirehoseTmpDir("testWithoutCacheAndFetch");
try (Firehose firehose = factory.connect(PARSER, firehoseTmpDir)) {
while (firehose.hasMore()) {
rows.add(firehose.nextRow());
}
}
Assert.assertEquals(0, factory.getCacheManager().getTotalCachedBytes());
assertResult(rows);
assertNumRemainingCacheFiles(firehoseTmpDir, 0);
}
use of org.apache.druid.data.input.Row in project druid by druid-io.
the class HashBasedNumberedShardSpecTest method testIsInChunkWithMorePartitionsBeyondNumBucketsReturningTrue.
@Test
public void testIsInChunkWithMorePartitionsBeyondNumBucketsReturningTrue() {
final int numBuckets = 3;
final List<HashBasedNumberedShardSpec> specs = IntStream.range(0, 10).mapToObj(i -> newShardSpecForTesting(i, numBuckets)).collect(Collectors.toList());
final HashPartitioner hashPartitioner = createHashPartitionerForHashInputRow(numBuckets);
for (int i = 0; i < 10; i++) {
final InputRow row = new HashInputRow(numBuckets * 10000 + i);
Assert.assertTrue(isInChunk(specs.get(i), hashPartitioner, row.getTimestampFromEpoch(), row));
}
}
use of org.apache.druid.data.input.Row in project druid by druid-io.
the class MovingAverageQueryRunner method run.
@Override
public Sequence<Row> run(QueryPlus<Row> query, ResponseContext responseContext) {
MovingAverageQuery maq = (MovingAverageQuery) query.getQuery();
List<Interval> intervals;
final Period period;
// Get the largest bucket from the list of averagers
Optional<Integer> opt = maq.getAveragerSpecs().stream().map(AveragerFactory::getNumBuckets).max(Integer::compare);
int buckets = opt.orElse(0);
// Extend the interval beginning by specified bucket - 1
if (maq.getGranularity() instanceof PeriodGranularity) {
period = ((PeriodGranularity) maq.getGranularity()).getPeriod();
int offset = buckets <= 0 ? 0 : (1 - buckets);
intervals = maq.getIntervals().stream().map(i -> new Interval(i.getStart().withPeriodAdded(period, offset), i.getEnd())).collect(Collectors.toList());
} else {
throw new ISE("Only PeriodGranulaity is supported for movingAverage queries");
}
Sequence<Row> resultsSeq;
DataSource dataSource = maq.getDataSource();
if (maq.getDimensions() != null && !maq.getDimensions().isEmpty() && (dataSource instanceof TableDataSource || dataSource instanceof UnionDataSource || dataSource instanceof QueryDataSource)) {
// build groupBy query from movingAverage query
GroupByQuery.Builder builder = GroupByQuery.builder().setDataSource(dataSource).setInterval(intervals).setDimFilter(maq.getFilter()).setGranularity(maq.getGranularity()).setDimensions(maq.getDimensions()).setAggregatorSpecs(maq.getAggregatorSpecs()).setPostAggregatorSpecs(maq.getPostAggregatorSpecs()).setContext(maq.getContext());
GroupByQuery gbq = builder.build();
ResponseContext gbqResponseContext = ResponseContext.createEmpty();
gbqResponseContext.merge(responseContext);
gbqResponseContext.putQueryFailDeadlineMs(System.currentTimeMillis() + QueryContexts.getTimeout(gbq));
Sequence<ResultRow> results = gbq.getRunner(walker).run(QueryPlus.wrap(gbq), gbqResponseContext);
try {
// use localhost for remote address
requestLogger.logNativeQuery(RequestLogLine.forNative(gbq, DateTimes.nowUtc(), "127.0.0.1", new QueryStats(ImmutableMap.of("query/time", 0, "query/bytes", 0, "success", true))));
} catch (Exception e) {
throw Throwables.propagate(e);
}
resultsSeq = results.map(row -> row.toMapBasedRow(gbq));
} else {
// no dimensions, so optimize this as a TimeSeries
TimeseriesQuery tsq = new TimeseriesQuery(dataSource, new MultipleIntervalSegmentSpec(intervals), false, null, maq.getFilter(), maq.getGranularity(), maq.getAggregatorSpecs(), maq.getPostAggregatorSpecs(), 0, maq.getContext());
ResponseContext tsqResponseContext = ResponseContext.createEmpty();
tsqResponseContext.merge(responseContext);
tsqResponseContext.putQueryFailDeadlineMs(System.currentTimeMillis() + QueryContexts.getTimeout(tsq));
Sequence<Result<TimeseriesResultValue>> results = tsq.getRunner(walker).run(QueryPlus.wrap(tsq), tsqResponseContext);
try {
// use localhost for remote address
requestLogger.logNativeQuery(RequestLogLine.forNative(tsq, DateTimes.nowUtc(), "127.0.0.1", new QueryStats(ImmutableMap.of("query/time", 0, "query/bytes", 0, "success", true))));
} catch (Exception e) {
throw Throwables.propagate(e);
}
resultsSeq = Sequences.map(results, new TimeseriesResultToRow());
}
// Process into period buckets
Sequence<RowBucket> bucketedMovingAvgResults = Sequences.simple(new RowBucketIterable(resultsSeq, intervals, period));
// Apply the windows analysis functions
Sequence<Row> movingAvgResults = Sequences.simple(new MovingAverageIterable(bucketedMovingAvgResults, maq.getDimensions(), maq.getAveragerSpecs(), maq.getPostAggregatorSpecs(), maq.getAggregatorSpecs()));
// Apply any postAveragers
Sequence<Row> movingAvgResultsWithPostAveragers = Sequences.map(movingAvgResults, new PostAveragerAggregatorCalculator(maq));
// remove rows outside the reporting window
List<Interval> reportingIntervals = maq.getIntervals();
movingAvgResults = Sequences.filter(movingAvgResultsWithPostAveragers, row -> reportingIntervals.stream().anyMatch(i -> i.contains(row.getTimestamp())));
// Apply any having, sorting, and limits
movingAvgResults = maq.applyLimit(movingAvgResults);
return movingAvgResults;
}
use of org.apache.druid.data.input.Row in project druid by druid-io.
the class MovingAverageIterableTest method testCompleteData.
@Test
public void testCompleteData() {
Map<String, Object> event1 = new HashMap<>();
Map<String, Object> event2 = new HashMap<>();
Map<String, Object> event3 = new HashMap<>();
event1.put("gender", "m");
event1.put("pageViews", 10L);
event2.put("gender", "f");
event2.put("pageViews", 20L);
event3.put("gender", "u");
event3.put("pageViews", 30L);
List<DimensionSpec> ds = new ArrayList<>();
ds.add(new DefaultDimensionSpec("gender", "gender"));
Row jan1Row1 = new MapBasedRow(JAN_1, event1);
Row jan1Row2 = new MapBasedRow(JAN_1, event2);
Row jan1Row3 = new MapBasedRow(JAN_1, event3);
Row jan2Row1 = new MapBasedRow(JAN_2, event1);
Row jan2Row2 = new MapBasedRow(JAN_2, event2);
Row jan2Row3 = new MapBasedRow(JAN_2, event3);
Sequence<RowBucket> seq = Sequences.simple(Arrays.asList(new RowBucket(JAN_1, Arrays.asList(jan1Row1, jan1Row2, jan1Row3)), new RowBucket(JAN_2, Arrays.asList(jan2Row1, jan2Row2, jan2Row3))));
Iterator<Row> iter = new MovingAverageIterable(seq, ds, Collections.singletonList(new LongMeanAveragerFactory("movingAvgPageViews", 2, 1, "pageViews")), Collections.emptyList(), Collections.singletonList(new LongSumAggregatorFactory("pageViews", "pageViews"))).iterator();
Assert.assertTrue(iter.hasNext());
Row result = iter.next();
Assert.assertEquals("m", (result.getDimension("gender")).get(0));
Assert.assertEquals(JAN_1, (result.getTimestamp()));
Assert.assertTrue(iter.hasNext());
result = iter.next();
Assert.assertEquals("f", (result.getDimension("gender")).get(0));
Assert.assertEquals(JAN_1, (result.getTimestamp()));
Assert.assertTrue(iter.hasNext());
result = iter.next();
Assert.assertEquals("u", (result.getDimension("gender")).get(0));
Assert.assertEquals(JAN_1, (result.getTimestamp()));
Assert.assertTrue(iter.hasNext());
result = iter.next();
Assert.assertEquals("m", (result.getDimension("gender")).get(0));
Assert.assertEquals(JAN_2, (result.getTimestamp()));
Assert.assertTrue(iter.hasNext());
result = iter.next();
Assert.assertEquals("f", (result.getDimension("gender")).get(0));
Assert.assertEquals(JAN_2, (result.getTimestamp()));
Assert.assertTrue(iter.hasNext());
result = iter.next();
Assert.assertEquals("u", (result.getDimension("gender")).get(0));
Assert.assertEquals(JAN_2, (result.getTimestamp()));
Assert.assertFalse(iter.hasNext());
}
Aggregations