use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class DataSchema method getParser.
@JsonIgnore
public InputRowParser getParser() {
if (parser == null) {
log.warn("No parser has been specified");
return null;
}
final InputRowParser inputRowParser = jsonMapper.convertValue(this.parser, InputRowParser.class);
final Set<String> dimensionExclusions = Sets.newHashSet();
for (AggregatorFactory aggregator : aggregators) {
dimensionExclusions.addAll(aggregator.requiredFields());
dimensionExclusions.add(aggregator.getName());
}
if (inputRowParser.getParseSpec() != null) {
final DimensionsSpec dimensionsSpec = inputRowParser.getParseSpec().getDimensionsSpec();
final TimestampSpec timestampSpec = inputRowParser.getParseSpec().getTimestampSpec();
// exclude timestamp from dimensions by default, unless explicitly included in the list of dimensions
if (timestampSpec != null) {
final String timestampColumn = timestampSpec.getTimestampColumn();
if (!(dimensionsSpec.hasCustomDimensions() && dimensionsSpec.getDimensionNames().contains(timestampColumn))) {
dimensionExclusions.add(timestampColumn);
}
}
if (dimensionsSpec != null) {
final Set<String> metSet = Sets.newHashSet();
for (AggregatorFactory aggregator : aggregators) {
metSet.add(aggregator.getName());
}
final Set<String> dimSet = Sets.newHashSet(dimensionsSpec.getDimensionNames());
final Set<String> overlap = Sets.intersection(metSet, dimSet);
if (!overlap.isEmpty()) {
throw new IAE("Cannot have overlapping dimensions and metrics of the same name. Please change the name of the metric. Overlap: %s", overlap);
}
return inputRowParser.withParseSpec(inputRowParser.getParseSpec().withDimensionsSpec(dimensionsSpec.withDimensionExclusions(Sets.difference(dimensionExclusions, dimSet))));
} else {
return inputRowParser;
}
} else {
log.warn("No parseSpec in parser has been specified.");
return inputRowParser;
}
}
use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class CachingClusteredClientFunctionalityTest method testUncoveredInterval.
@Test
public void testUncoveredInterval() throws Exception {
addToTimeline(new Interval("2015-01-02/2015-01-03"), "1");
addToTimeline(new Interval("2015-01-04/2015-01-05"), "1");
addToTimeline(new Interval("2015-02-04/2015-02-05"), "1");
final Druids.TimeseriesQueryBuilder builder = Druids.newTimeseriesQueryBuilder().dataSource("test").intervals("2015-01-02/2015-01-03").granularity("day").aggregators(Arrays.<AggregatorFactory>asList(new CountAggregatorFactory("rows"))).context(ImmutableMap.<String, Object>of("uncoveredIntervalsLimit", 3));
Map<String, Object> responseContext = new HashMap<>();
client.run(builder.build(), responseContext);
Assert.assertNull(responseContext.get("uncoveredIntervals"));
builder.intervals("2015-01-01/2015-01-03");
responseContext = new HashMap<>();
client.run(builder.build(), responseContext);
assertUncovered(responseContext, false, "2015-01-01/2015-01-02");
builder.intervals("2015-01-01/2015-01-04");
responseContext = new HashMap<>();
client.run(builder.build(), responseContext);
assertUncovered(responseContext, false, "2015-01-01/2015-01-02", "2015-01-03/2015-01-04");
builder.intervals("2015-01-02/2015-01-04");
responseContext = new HashMap<>();
client.run(builder.build(), responseContext);
assertUncovered(responseContext, false, "2015-01-03/2015-01-04");
builder.intervals("2015-01-01/2015-01-30");
responseContext = new HashMap<>();
client.run(builder.build(), responseContext);
assertUncovered(responseContext, false, "2015-01-01/2015-01-02", "2015-01-03/2015-01-04", "2015-01-05/2015-01-30");
builder.intervals("2015-01-02/2015-01-30");
responseContext = new HashMap<>();
client.run(builder.build(), responseContext);
assertUncovered(responseContext, false, "2015-01-03/2015-01-04", "2015-01-05/2015-01-30");
builder.intervals("2015-01-04/2015-01-30");
responseContext = new HashMap<>();
client.run(builder.build(), responseContext);
assertUncovered(responseContext, false, "2015-01-05/2015-01-30");
builder.intervals("2015-01-10/2015-01-30");
responseContext = new HashMap<>();
client.run(builder.build(), responseContext);
assertUncovered(responseContext, false, "2015-01-10/2015-01-30");
builder.intervals("2015-01-01/2015-02-25");
responseContext = new HashMap<>();
client.run(builder.build(), responseContext);
assertUncovered(responseContext, true, "2015-01-01/2015-01-02", "2015-01-03/2015-01-04", "2015-01-05/2015-02-04");
}
use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class CachingClusteredClientTest method testGroupByCaching.
@Test
public void testGroupByCaching() throws Exception {
List<AggregatorFactory> aggsWithUniques = ImmutableList.<AggregatorFactory>builder().addAll(AGGS).add(new HyperUniquesAggregatorFactory("uniques", "uniques")).build();
final HashFunction hashFn = Hashing.murmur3_128();
GroupByQuery.Builder builder = new GroupByQuery.Builder().setDataSource(DATA_SOURCE).setQuerySegmentSpec(SEG_SPEC).setDimFilter(DIM_FILTER).setGranularity(GRANULARITY).setDimensions(Arrays.<DimensionSpec>asList(new DefaultDimensionSpec("a", "a"))).setAggregatorSpecs(aggsWithUniques).setPostAggregatorSpecs(POST_AGGS).setContext(CONTEXT);
final HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
collector.add(hashFn.hashString("abc123", Charsets.UTF_8).asBytes());
collector.add(hashFn.hashString("123abc", Charsets.UTF_8).asBytes());
testQueryCaching(client, builder.build(), new Interval("2011-01-01/2011-01-02"), makeGroupByResults(new DateTime("2011-01-01"), ImmutableMap.of("a", "a", "rows", 1, "imps", 1, "impers", 1, "uniques", collector)), new Interval("2011-01-02/2011-01-03"), makeGroupByResults(new DateTime("2011-01-02"), ImmutableMap.of("a", "b", "rows", 2, "imps", 2, "impers", 2, "uniques", collector)), new Interval("2011-01-05/2011-01-10"), makeGroupByResults(new DateTime("2011-01-05"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)), new Interval("2011-01-05/2011-01-10"), makeGroupByResults(new DateTime("2011-01-05T01"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06T01"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07T01"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08T01"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09T01"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)));
QueryRunner runner = new FinalizeResultsQueryRunner(client, GroupByQueryRunnerTest.makeQueryRunnerFactory(new GroupByQueryConfig()).getToolchest());
HashMap<String, Object> context = new HashMap<String, Object>();
TestHelper.assertExpectedObjects(makeGroupByResults(new DateTime("2011-01-05T"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-05T01"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06T"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-06T01"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07T"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-07T01"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08T"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-08T01"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09T"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector), new DateTime("2011-01-09T01"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)), runner.run(builder.setInterval("2011-01-05/2011-01-10").build(), context), "");
}
use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class SqlBenchmark method setup.
@Setup(Level.Trial)
public void setup() throws Exception {
tmpDir = Files.createTempDir();
log.info("Starting benchmark setup using tmpDir[%s], rows[%,d].", tmpDir, rowsPerSegment);
if (ComplexMetrics.getSerdeForType("hyperUnique") == null) {
ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde(HyperLogLogHash.getDefault()));
}
final BenchmarkSchemaInfo schemaInfo = BenchmarkSchemas.SCHEMA_MAP.get("basic");
final BenchmarkDataGenerator dataGenerator = new BenchmarkDataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED + 1, schemaInfo.getDataInterval(), rowsPerSegment);
final List<InputRow> rows = Lists.newArrayList();
for (int i = 0; i < rowsPerSegment; i++) {
final InputRow row = dataGenerator.nextRow();
if (i % 20000 == 0) {
log.info("%,d/%,d rows generated.", i, rowsPerSegment);
}
rows.add(row);
}
log.info("%,d/%,d rows generated.", rows.size(), rowsPerSegment);
final PlannerConfig plannerConfig = new PlannerConfig();
final QueryRunnerFactoryConglomerate conglomerate = CalciteTests.queryRunnerFactoryConglomerate();
final QueryableIndex index = IndexBuilder.create().tmpDir(new File(tmpDir, "1")).indexMerger(TestHelper.getTestIndexMergerV9()).rows(rows).buildMMappedIndex();
this.walker = new SpecificSegmentsQuerySegmentWalker(conglomerate).add(DataSegment.builder().dataSource("foo").interval(index.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).build(), index);
final Map<String, Table> tableMap = ImmutableMap.<String, Table>of("foo", new DruidTable(new TableDataSource("foo"), RowSignature.builder().add("__time", ValueType.LONG).add("dimSequential", ValueType.STRING).add("dimZipf", ValueType.STRING).add("dimUniform", ValueType.STRING).build()));
final Schema druidSchema = new AbstractSchema() {
@Override
protected Map<String, Table> getTableMap() {
return tableMap;
}
};
plannerFactory = new PlannerFactory(Calcites.createRootSchema(druidSchema), walker, CalciteTests.createOperatorTable(), plannerConfig);
groupByQuery = GroupByQuery.builder().setDataSource("foo").setInterval(new Interval(JodaUtils.MIN_INSTANT, JodaUtils.MAX_INSTANT)).setDimensions(Arrays.<DimensionSpec>asList(new DefaultDimensionSpec("dimZipf", "d0"), new DefaultDimensionSpec("dimSequential", "d1"))).setAggregatorSpecs(Arrays.<AggregatorFactory>asList(new CountAggregatorFactory("c"))).setGranularity(Granularities.ALL).build();
sqlQuery = "SELECT\n" + " dimZipf AS d0," + " dimSequential AS d1,\n" + " COUNT(*) AS c\n" + "FROM druid.foo\n" + "GROUP BY dimZipf, dimSequential";
}
use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class IncrementalIndexTest method testgetDimensions.
@Test
public void testgetDimensions() {
final IncrementalIndex<Aggregator> incrementalIndex = new OnheapIncrementalIndex(new IncrementalIndexSchema.Builder().withQueryGranularity(Granularities.NONE).withMetrics(new AggregatorFactory[] { new CountAggregatorFactory("count") }).withDimensionsSpec(new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("dim0", "dim1")), null, null)).build(), true, 1000000);
closer.closeLater(incrementalIndex);
Assert.assertEquals(Arrays.asList("dim0", "dim1"), incrementalIndex.getDimensionNames());
}
Aggregations