use of io.druid.java.util.common.guava.Sequence in project druid by druid-io.
the class OldApiSketchAggregationTest method testSketchDataIngestAndQuery.
@Test
public void testSketchDataIngestAndQuery() throws Exception {
Sequence seq = helper.createIndexAndRunQueryOnSegment(new File(OldApiSketchAggregationTest.class.getClassLoader().getResource("sketch_test_data.tsv").getFile()), readFileFromClasspathAsString("sketch_test_data_record_parser.json"), readFileFromClasspathAsString("oldapi/old_sketch_test_data_aggregators.json"), 0, Granularities.NONE, 5, readFileFromClasspathAsString("oldapi/old_sketch_test_data_group_by_query.json"));
List results = Sequences.toList(seq, Lists.newArrayList());
Assert.assertEquals(1, results.size());
Assert.assertEquals(new MapBasedRow(DateTime.parse("2014-10-19T00:00:00.000Z"), ImmutableMap.<String, Object>builder().put("sids_sketch_count", 50.0).put("sketchEstimatePostAgg", 50.0).put("sketchUnionPostAggEstimate", 50.0).put("sketchIntersectionPostAggEstimate", 50.0).put("sketchAnotBPostAggEstimate", 0.0).put("non_existing_col_validation", 0.0).build()), results.get(0));
}
use of io.druid.java.util.common.guava.Sequence in project druid by druid-io.
the class ApproximateHistogramAggregationTest method ingestAndQuery.
private MapBasedRow ingestAndQuery(boolean ignoreNulls) throws Exception {
String ingestionAgg = ignoreNulls ? "approxHistogramFold" : "approxHistogram";
String metricSpec = "[{" + "\"type\": \"" + ingestionAgg + "\"," + "\"name\": \"index_ah\"," + "\"fieldName\": \"index\"" + "}]";
String parseSpec = "{" + "\"type\" : \"string\"," + "\"parseSpec\" : {" + " \"format\" : \"tsv\"," + " \"timestampSpec\" : {" + " \"column\" : \"timestamp\"," + " \"format\" : \"auto\"" + "}," + " \"dimensionsSpec\" : {" + " \"dimensions\": []," + " \"dimensionExclusions\" : []," + " \"spatialDimensions\" : []" + " }," + " \"columns\": [\"timestamp\", \"market\", \"quality\", \"placement\", \"placementish\", \"index\"]" + " }" + "}";
String query = "{" + "\"queryType\": \"groupBy\"," + "\"dataSource\": \"test_datasource\"," + "\"granularity\": \"ALL\"," + "\"dimensions\": []," + "\"aggregations\": [" + " { \"type\": \"approxHistogramFold\", \"name\": \"index_ah\", \"fieldName\": \"index_ah\" }" + "]," + "\"postAggregations\": [" + " { \"type\": \"min\", \"name\": \"index_min\", \"fieldName\": \"index_ah\"}," + " { \"type\": \"max\", \"name\": \"index_max\", \"fieldName\": \"index_ah\"}," + " { \"type\": \"quantile\", \"name\": \"index_quantile\", \"fieldName\": \"index_ah\", \"probability\" : 0.99 }" + "]," + "\"intervals\": [ \"1970/2050\" ]" + "}";
Sequence seq = helper.createIndexAndRunQueryOnSegment(this.getClass().getClassLoader().getResourceAsStream("sample.data.tsv"), parseSpec, metricSpec, 0, Granularities.NONE, 50000, query);
return (MapBasedRow) Sequences.toList(seq, Lists.newArrayList()).get(0);
}
use of io.druid.java.util.common.guava.Sequence in project druid by druid-io.
the class QueryMaker method executeSelect.
private Sequence<Object[]> executeSelect(final DruidQueryBuilder queryBuilder, final SelectQuery baseQuery) {
Preconditions.checkState(queryBuilder.getGrouping() == null, "grouping must be null");
final List<RelDataTypeField> fieldList = queryBuilder.getRowType().getFieldList();
final Integer limit = queryBuilder.getLimitSpec() != null ? queryBuilder.getLimitSpec().getLimit() : null;
// Select is paginated, we need to make multiple queries.
final Sequence<Sequence<Object[]>> sequenceOfSequences = Sequences.simple(new Iterable<Sequence<Object[]>>() {
@Override
public Iterator<Sequence<Object[]>> iterator() {
final AtomicBoolean morePages = new AtomicBoolean(true);
final AtomicReference<Map<String, Integer>> pagingIdentifiers = new AtomicReference<>();
final AtomicLong rowsRead = new AtomicLong();
// Each Sequence<Object[]> is one page.
return new Iterator<Sequence<Object[]>>() {
@Override
public boolean hasNext() {
return morePages.get();
}
@Override
public Sequence<Object[]> next() {
final SelectQuery queryWithPagination = baseQuery.withPagingSpec(new PagingSpec(pagingIdentifiers.get(), plannerContext.getPlannerConfig().getSelectThreshold(), true));
Hook.QUERY_PLAN.run(queryWithPagination);
morePages.set(false);
final AtomicBoolean gotResult = new AtomicBoolean();
return Sequences.concat(Sequences.map(queryWithPagination.run(walker, Maps.<String, Object>newHashMap()), new Function<Result<SelectResultValue>, Sequence<Object[]>>() {
@Override
public Sequence<Object[]> apply(final Result<SelectResultValue> result) {
if (!gotResult.compareAndSet(false, true)) {
throw new ISE("WTF?! Expected single result from Select query but got multiple!");
}
pagingIdentifiers.set(result.getValue().getPagingIdentifiers());
final List<Object[]> retVals = new ArrayList<>();
for (EventHolder holder : result.getValue().getEvents()) {
morePages.set(true);
final Map<String, Object> map = holder.getEvent();
final Object[] retVal = new Object[fieldList.size()];
for (RelDataTypeField field : fieldList) {
final String outputName = queryBuilder.getRowOrder().get(field.getIndex());
if (outputName.equals(Column.TIME_COLUMN_NAME)) {
retVal[field.getIndex()] = coerce(holder.getTimestamp().getMillis(), field.getType().getSqlTypeName());
} else {
retVal[field.getIndex()] = coerce(map.get(outputName), field.getType().getSqlTypeName());
}
}
if (limit == null || rowsRead.incrementAndGet() <= limit) {
retVals.add(retVal);
} else {
morePages.set(false);
return Sequences.simple(retVals);
}
}
return Sequences.simple(retVals);
}
}));
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
});
return Sequences.concat(sequenceOfSequences);
}
use of io.druid.java.util.common.guava.Sequence in project druid by druid-io.
the class GroupByQueryEngine method process.
public Sequence<Row> process(final GroupByQuery query, final StorageAdapter storageAdapter) {
if (storageAdapter == null) {
throw new ISE("Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped.");
}
final List<Interval> intervals = query.getQuerySegmentSpec().getIntervals();
if (intervals.size() != 1) {
throw new IAE("Should only have one interval, got[%s]", intervals);
}
Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getDimFilter()));
final Sequence<Cursor> cursors = storageAdapter.makeCursors(filter, intervals.get(0), query.getVirtualColumns(), query.getGranularity(), false);
final ResourceHolder<ByteBuffer> bufferHolder = intermediateResultsBufferPool.take();
return Sequences.concat(Sequences.withBaggage(Sequences.map(cursors, new Function<Cursor, Sequence<Row>>() {
@Override
public Sequence<Row> apply(final Cursor cursor) {
return new BaseSequence<>(new BaseSequence.IteratorMaker<Row, RowIterator>() {
@Override
public RowIterator make() {
return new RowIterator(query, cursor, bufferHolder.get(), config.get());
}
@Override
public void cleanup(RowIterator iterFromMake) {
CloseQuietly.close(iterFromMake);
}
});
}
}), new Closeable() {
@Override
public void close() throws IOException {
CloseQuietly.close(bufferHolder);
}
}));
}
use of io.druid.java.util.common.guava.Sequence in project druid by druid-io.
the class IntervalChunkingQueryRunner method run.
@Override
public Sequence<T> run(final Query<T> query, final Map<String, Object> responseContext) {
final Period chunkPeriod = getChunkPeriod(query);
// Check for non-empty chunkPeriod, avoiding toStandardDuration since that cannot handle periods like P1M.
if (EPOCH.plus(chunkPeriod).getMillis() == EPOCH.getMillis()) {
return baseRunner.run(query, responseContext);
}
List<Interval> chunkIntervals = Lists.newArrayList(FunctionalIterable.create(query.getIntervals()).transformCat(new Function<Interval, Iterable<Interval>>() {
@Override
public Iterable<Interval> apply(Interval input) {
return splitInterval(input, chunkPeriod);
}
}));
if (chunkIntervals.size() <= 1) {
return baseRunner.run(query, responseContext);
}
return Sequences.concat(Lists.newArrayList(FunctionalIterable.create(chunkIntervals).transform(new Function<Interval, Sequence<T>>() {
@Override
public Sequence<T> apply(Interval singleInterval) {
return new AsyncQueryRunner<T>(//not lazy i.e. it does most of its work on call to run() method
toolChest.mergeResults(new MetricsEmittingQueryRunner<T>(emitter, new Function<Query<T>, ServiceMetricEvent.Builder>() {
@Override
public ServiceMetricEvent.Builder apply(Query<T> input) {
return toolChest.makeMetricBuilder(input);
}
}, baseRunner, "query/intervalChunk/time", ImmutableMap.of("chunkInterval", singleInterval.toString())).withWaitMeasuredFromNow()), executor, queryWatcher).run(query.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(Arrays.asList(singleInterval))), responseContext);
}
})));
}
Aggregations