Search in sources :

Example 16 with Row

use of org.apache.druid.data.input.Row in project druid by druid-io.

the class PrefetchableTextFilesFirehoseFactoryTest method testReconnectWithCacheAndPrefetch.

@Test
public void testReconnectWithCacheAndPrefetch() throws IOException {
    final TestPrefetchableTextFilesFirehoseFactory factory = TestPrefetchableTextFilesFirehoseFactory.of(TEST_DIR);
    final File firehoseTmpDir = createFirehoseTmpDir("testReconnectWithCacheAndPrefetch");
    for (int i = 0; i < 5; i++) {
        final List<Row> rows = new ArrayList<>();
        try (Firehose firehose = factory.connect(PARSER, firehoseTmpDir)) {
            if (i > 0) {
                Assert.assertEquals(FILE_SIZE * 2, factory.getCacheManager().getTotalCachedBytes());
            }
            while (firehose.hasMore()) {
                rows.add(firehose.nextRow());
            }
        }
        assertResult(rows);
        assertNumRemainingCacheFiles(firehoseTmpDir, 2);
    }
}
Also used : Firehose(org.apache.druid.data.input.Firehose) ArrayList(java.util.ArrayList) Row(org.apache.druid.data.input.Row) File(java.io.File) Test(org.junit.Test)

Example 17 with Row

use of org.apache.druid.data.input.Row in project druid by druid-io.

the class PrefetchableTextFilesFirehoseFactoryTest method testWithoutCacheAndFetch.

@Test
public void testWithoutCacheAndFetch() throws IOException {
    final TestPrefetchableTextFilesFirehoseFactory factory = TestPrefetchableTextFilesFirehoseFactory.with(TEST_DIR, 0, 0);
    final List<Row> rows = new ArrayList<>();
    final File firehoseTmpDir = createFirehoseTmpDir("testWithoutCacheAndFetch");
    try (Firehose firehose = factory.connect(PARSER, firehoseTmpDir)) {
        while (firehose.hasMore()) {
            rows.add(firehose.nextRow());
        }
    }
    Assert.assertEquals(0, factory.getCacheManager().getTotalCachedBytes());
    assertResult(rows);
    assertNumRemainingCacheFiles(firehoseTmpDir, 0);
}
Also used : Firehose(org.apache.druid.data.input.Firehose) ArrayList(java.util.ArrayList) Row(org.apache.druid.data.input.Row) File(java.io.File) Test(org.junit.Test)

Example 18 with Row

use of org.apache.druid.data.input.Row in project druid by druid-io.

the class HashBasedNumberedShardSpecTest method testIsInChunkWithMorePartitionsBeyondNumBucketsReturningTrue.

@Test
public void testIsInChunkWithMorePartitionsBeyondNumBucketsReturningTrue() {
    final int numBuckets = 3;
    final List<HashBasedNumberedShardSpec> specs = IntStream.range(0, 10).mapToObj(i -> newShardSpecForTesting(i, numBuckets)).collect(Collectors.toList());
    final HashPartitioner hashPartitioner = createHashPartitionerForHashInputRow(numBuckets);
    for (int i = 0; i < 10; i++) {
        final InputRow row = new HashInputRow(numBuckets * 10000 + i);
        Assert.assertTrue(isInChunk(specs.get(i), hashPartitioner, row.getTimestampFromEpoch(), row));
    }
}
Also used : IntStream(java.util.stream.IntStream) DateTimes(org.apache.druid.java.util.common.DateTimes) Function(com.google.common.base.Function) RangeSet(com.google.common.collect.RangeSet) ImmutableMap(com.google.common.collect.ImmutableMap) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Range(com.google.common.collect.Range) DateTime(org.joda.time.DateTime) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) Test(org.junit.Test) TreeRangeSet(com.google.common.collect.TreeRangeSet) EqualsVerifier(nl.jqno.equalsverifier.EqualsVerifier) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) Row(org.apache.druid.data.input.Row) InputRow(org.apache.druid.data.input.InputRow) List(java.util.List) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) Assert(org.junit.Assert) Collections(java.util.Collections) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) InputRow(org.apache.druid.data.input.InputRow) Test(org.junit.Test)

Example 19 with Row

use of org.apache.druid.data.input.Row in project druid by druid-io.

the class MovingAverageQueryRunner method run.

@Override
public Sequence<Row> run(QueryPlus<Row> query, ResponseContext responseContext) {
    MovingAverageQuery maq = (MovingAverageQuery) query.getQuery();
    List<Interval> intervals;
    final Period period;
    // Get the largest bucket from the list of averagers
    Optional<Integer> opt = maq.getAveragerSpecs().stream().map(AveragerFactory::getNumBuckets).max(Integer::compare);
    int buckets = opt.orElse(0);
    // Extend the interval beginning by specified bucket - 1
    if (maq.getGranularity() instanceof PeriodGranularity) {
        period = ((PeriodGranularity) maq.getGranularity()).getPeriod();
        int offset = buckets <= 0 ? 0 : (1 - buckets);
        intervals = maq.getIntervals().stream().map(i -> new Interval(i.getStart().withPeriodAdded(period, offset), i.getEnd())).collect(Collectors.toList());
    } else {
        throw new ISE("Only PeriodGranulaity is supported for movingAverage queries");
    }
    Sequence<Row> resultsSeq;
    DataSource dataSource = maq.getDataSource();
    if (maq.getDimensions() != null && !maq.getDimensions().isEmpty() && (dataSource instanceof TableDataSource || dataSource instanceof UnionDataSource || dataSource instanceof QueryDataSource)) {
        // build groupBy query from movingAverage query
        GroupByQuery.Builder builder = GroupByQuery.builder().setDataSource(dataSource).setInterval(intervals).setDimFilter(maq.getFilter()).setGranularity(maq.getGranularity()).setDimensions(maq.getDimensions()).setAggregatorSpecs(maq.getAggregatorSpecs()).setPostAggregatorSpecs(maq.getPostAggregatorSpecs()).setContext(maq.getContext());
        GroupByQuery gbq = builder.build();
        ResponseContext gbqResponseContext = ResponseContext.createEmpty();
        gbqResponseContext.merge(responseContext);
        gbqResponseContext.putQueryFailDeadlineMs(System.currentTimeMillis() + QueryContexts.getTimeout(gbq));
        Sequence<ResultRow> results = gbq.getRunner(walker).run(QueryPlus.wrap(gbq), gbqResponseContext);
        try {
            // use localhost for remote address
            requestLogger.logNativeQuery(RequestLogLine.forNative(gbq, DateTimes.nowUtc(), "127.0.0.1", new QueryStats(ImmutableMap.of("query/time", 0, "query/bytes", 0, "success", true))));
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
        resultsSeq = results.map(row -> row.toMapBasedRow(gbq));
    } else {
        // no dimensions, so optimize this as a TimeSeries
        TimeseriesQuery tsq = new TimeseriesQuery(dataSource, new MultipleIntervalSegmentSpec(intervals), false, null, maq.getFilter(), maq.getGranularity(), maq.getAggregatorSpecs(), maq.getPostAggregatorSpecs(), 0, maq.getContext());
        ResponseContext tsqResponseContext = ResponseContext.createEmpty();
        tsqResponseContext.merge(responseContext);
        tsqResponseContext.putQueryFailDeadlineMs(System.currentTimeMillis() + QueryContexts.getTimeout(tsq));
        Sequence<Result<TimeseriesResultValue>> results = tsq.getRunner(walker).run(QueryPlus.wrap(tsq), tsqResponseContext);
        try {
            // use localhost for remote address
            requestLogger.logNativeQuery(RequestLogLine.forNative(tsq, DateTimes.nowUtc(), "127.0.0.1", new QueryStats(ImmutableMap.of("query/time", 0, "query/bytes", 0, "success", true))));
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
        resultsSeq = Sequences.map(results, new TimeseriesResultToRow());
    }
    // Process into period buckets
    Sequence<RowBucket> bucketedMovingAvgResults = Sequences.simple(new RowBucketIterable(resultsSeq, intervals, period));
    // Apply the windows analysis functions
    Sequence<Row> movingAvgResults = Sequences.simple(new MovingAverageIterable(bucketedMovingAvgResults, maq.getDimensions(), maq.getAveragerSpecs(), maq.getPostAggregatorSpecs(), maq.getAggregatorSpecs()));
    // Apply any postAveragers
    Sequence<Row> movingAvgResultsWithPostAveragers = Sequences.map(movingAvgResults, new PostAveragerAggregatorCalculator(maq));
    // remove rows outside the reporting window
    List<Interval> reportingIntervals = maq.getIntervals();
    movingAvgResults = Sequences.filter(movingAvgResultsWithPostAveragers, row -> reportingIntervals.stream().anyMatch(i -> i.contains(row.getTimestamp())));
    // Apply any having, sorting, and limits
    movingAvgResults = maq.applyLimit(movingAvgResults);
    return movingAvgResults;
}
Also used : QueryPlus(org.apache.druid.query.QueryPlus) MapBasedRow(org.apache.druid.data.input.MapBasedRow) AveragerFactory(org.apache.druid.query.movingaverage.averagers.AveragerFactory) TimeseriesResultValue(org.apache.druid.query.timeseries.TimeseriesResultValue) Row(org.apache.druid.data.input.Row) QueryStats(org.apache.druid.server.QueryStats) Interval(org.joda.time.Interval) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) Map(java.util.Map) QueryRunner(org.apache.druid.query.QueryRunner) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) QuerySegmentWalker(org.apache.druid.query.QuerySegmentWalker) Sequences(org.apache.druid.java.util.common.guava.Sequences) Nullable(javax.annotation.Nullable) DateTimes(org.apache.druid.java.util.common.DateTimes) Sequence(org.apache.druid.java.util.common.guava.Sequence) Period(org.joda.time.Period) Function(com.google.common.base.Function) ImmutableMap(com.google.common.collect.ImmutableMap) ResponseContext(org.apache.druid.query.context.ResponseContext) ResultRow(org.apache.druid.query.groupby.ResultRow) DataSource(org.apache.druid.query.DataSource) Throwables(com.google.common.base.Throwables) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) RequestLogger(org.apache.druid.server.log.RequestLogger) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) QueryContexts(org.apache.druid.query.QueryContexts) TableDataSource(org.apache.druid.query.TableDataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) Result(org.apache.druid.query.Result) List(java.util.List) UnionDataSource(org.apache.druid.query.UnionDataSource) RequestLogLine(org.apache.druid.server.RequestLogLine) Optional(java.util.Optional) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) Result(org.apache.druid.query.Result) GroupByQuery(org.apache.druid.query.groupby.GroupByQuery) ResponseContext(org.apache.druid.query.context.ResponseContext) ISE(org.apache.druid.java.util.common.ISE) ResultRow(org.apache.druid.query.groupby.ResultRow) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) Period(org.joda.time.Period) UnionDataSource(org.apache.druid.query.UnionDataSource) DataSource(org.apache.druid.query.DataSource) TableDataSource(org.apache.druid.query.TableDataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) UnionDataSource(org.apache.druid.query.UnionDataSource) TableDataSource(org.apache.druid.query.TableDataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) QueryStats(org.apache.druid.server.QueryStats) MapBasedRow(org.apache.druid.data.input.MapBasedRow) Row(org.apache.druid.data.input.Row) ResultRow(org.apache.druid.query.groupby.ResultRow) Interval(org.joda.time.Interval)

Example 20 with Row

use of org.apache.druid.data.input.Row in project druid by druid-io.

the class MovingAverageIterableTest method testCompleteData.

@Test
public void testCompleteData() {
    Map<String, Object> event1 = new HashMap<>();
    Map<String, Object> event2 = new HashMap<>();
    Map<String, Object> event3 = new HashMap<>();
    event1.put("gender", "m");
    event1.put("pageViews", 10L);
    event2.put("gender", "f");
    event2.put("pageViews", 20L);
    event3.put("gender", "u");
    event3.put("pageViews", 30L);
    List<DimensionSpec> ds = new ArrayList<>();
    ds.add(new DefaultDimensionSpec("gender", "gender"));
    Row jan1Row1 = new MapBasedRow(JAN_1, event1);
    Row jan1Row2 = new MapBasedRow(JAN_1, event2);
    Row jan1Row3 = new MapBasedRow(JAN_1, event3);
    Row jan2Row1 = new MapBasedRow(JAN_2, event1);
    Row jan2Row2 = new MapBasedRow(JAN_2, event2);
    Row jan2Row3 = new MapBasedRow(JAN_2, event3);
    Sequence<RowBucket> seq = Sequences.simple(Arrays.asList(new RowBucket(JAN_1, Arrays.asList(jan1Row1, jan1Row2, jan1Row3)), new RowBucket(JAN_2, Arrays.asList(jan2Row1, jan2Row2, jan2Row3))));
    Iterator<Row> iter = new MovingAverageIterable(seq, ds, Collections.singletonList(new LongMeanAveragerFactory("movingAvgPageViews", 2, 1, "pageViews")), Collections.emptyList(), Collections.singletonList(new LongSumAggregatorFactory("pageViews", "pageViews"))).iterator();
    Assert.assertTrue(iter.hasNext());
    Row result = iter.next();
    Assert.assertEquals("m", (result.getDimension("gender")).get(0));
    Assert.assertEquals(JAN_1, (result.getTimestamp()));
    Assert.assertTrue(iter.hasNext());
    result = iter.next();
    Assert.assertEquals("f", (result.getDimension("gender")).get(0));
    Assert.assertEquals(JAN_1, (result.getTimestamp()));
    Assert.assertTrue(iter.hasNext());
    result = iter.next();
    Assert.assertEquals("u", (result.getDimension("gender")).get(0));
    Assert.assertEquals(JAN_1, (result.getTimestamp()));
    Assert.assertTrue(iter.hasNext());
    result = iter.next();
    Assert.assertEquals("m", (result.getDimension("gender")).get(0));
    Assert.assertEquals(JAN_2, (result.getTimestamp()));
    Assert.assertTrue(iter.hasNext());
    result = iter.next();
    Assert.assertEquals("f", (result.getDimension("gender")).get(0));
    Assert.assertEquals(JAN_2, (result.getTimestamp()));
    Assert.assertTrue(iter.hasNext());
    result = iter.next();
    Assert.assertEquals("u", (result.getDimension("gender")).get(0));
    Assert.assertEquals(JAN_2, (result.getTimestamp()));
    Assert.assertFalse(iter.hasNext());
}
Also used : DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) MapBasedRow(org.apache.druid.data.input.MapBasedRow) LongMeanAveragerFactory(org.apache.druid.query.movingaverage.averagers.LongMeanAveragerFactory) MapBasedRow(org.apache.druid.data.input.MapBasedRow) Row(org.apache.druid.data.input.Row) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

Row (org.apache.druid.data.input.Row)54 Test (org.junit.Test)44 ArrayList (java.util.ArrayList)32 MapBasedRow (org.apache.druid.data.input.MapBasedRow)21 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)21 File (java.io.File)18 Firehose (org.apache.druid.data.input.Firehose)15 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)15 HashMap (java.util.HashMap)13 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)13 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)12 DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)11 List (java.util.List)10 LongMeanAveragerFactory (org.apache.druid.query.movingaverage.averagers.LongMeanAveragerFactory)9 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)8 InputRow (org.apache.druid.data.input.InputRow)8 GroupByQuery (org.apache.druid.query.groupby.GroupByQuery)7 IOException (java.io.IOException)6 GroupByQueryConfig (org.apache.druid.query.groupby.GroupByQueryConfig)6 Function (com.google.common.base.Function)5