use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class DataSchema method getParser.
@JsonIgnore
public InputRowParser getParser() {
if (parser == null) {
log.warn("No parser has been specified");
return null;
}
final InputRowParser inputRowParser = jsonMapper.convertValue(this.parser, InputRowParser.class);
final Set<String> dimensionExclusions = Sets.newHashSet();
for (AggregatorFactory aggregator : aggregators) {
dimensionExclusions.addAll(aggregator.requiredFields());
dimensionExclusions.add(aggregator.getName());
}
if (inputRowParser.getParseSpec() != null) {
final DimensionsSpec dimensionsSpec = inputRowParser.getParseSpec().getDimensionsSpec();
final TimestampSpec timestampSpec = inputRowParser.getParseSpec().getTimestampSpec();
// exclude timestamp from dimensions by default, unless explicitly included in the list of dimensions
if (timestampSpec != null) {
final String timestampColumn = timestampSpec.getTimestampColumn();
if (!(dimensionsSpec.hasCustomDimensions() && dimensionsSpec.getDimensionNames().contains(timestampColumn))) {
dimensionExclusions.add(timestampColumn);
}
}
if (dimensionsSpec != null) {
final Set<String> metSet = Sets.newHashSet();
for (AggregatorFactory aggregator : aggregators) {
metSet.add(aggregator.getName());
}
final Set<String> dimSet = Sets.newHashSet(dimensionsSpec.getDimensionNames());
final Set<String> overlap = Sets.intersection(metSet, dimSet);
if (!overlap.isEmpty()) {
throw new IAE("Cannot have overlapping dimensions and metrics of the same name. Please change the name of the metric. Overlap: %s", overlap);
}
return inputRowParser.withParseSpec(inputRowParser.getParseSpec().withDimensionsSpec(dimensionsSpec.withDimensionExclusions(Sets.difference(dimensionExclusions, dimSet))));
} else {
return inputRowParser;
}
} else {
log.warn("No parseSpec in parser has been specified.");
return inputRowParser;
}
}
use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class CachingClusteredClientFunctionalityTest method testUncoveredInterval.
@Test
public void testUncoveredInterval() throws Exception {
addToTimeline(new Interval("2015-01-02/2015-01-03"), "1");
addToTimeline(new Interval("2015-01-04/2015-01-05"), "1");
addToTimeline(new Interval("2015-02-04/2015-02-05"), "1");
final Druids.TimeseriesQueryBuilder builder = Druids.newTimeseriesQueryBuilder().dataSource("test").intervals("2015-01-02/2015-01-03").granularity("day").aggregators(Arrays.<AggregatorFactory>asList(new CountAggregatorFactory("rows"))).context(ImmutableMap.<String, Object>of("uncoveredIntervalsLimit", 3));
Map<String, Object> responseContext = new HashMap<>();
client.run(builder.build(), responseContext);
Assert.assertNull(responseContext.get("uncoveredIntervals"));
builder.intervals("2015-01-01/2015-01-03");
responseContext = new HashMap<>();
client.run(builder.build(), responseContext);
assertUncovered(responseContext, false, "2015-01-01/2015-01-02");
builder.intervals("2015-01-01/2015-01-04");
responseContext = new HashMap<>();
client.run(builder.build(), responseContext);
assertUncovered(responseContext, false, "2015-01-01/2015-01-02", "2015-01-03/2015-01-04");
builder.intervals("2015-01-02/2015-01-04");
responseContext = new HashMap<>();
client.run(builder.build(), responseContext);
assertUncovered(responseContext, false, "2015-01-03/2015-01-04");
builder.intervals("2015-01-01/2015-01-30");
responseContext = new HashMap<>();
client.run(builder.build(), responseContext);
assertUncovered(responseContext, false, "2015-01-01/2015-01-02", "2015-01-03/2015-01-04", "2015-01-05/2015-01-30");
builder.intervals("2015-01-02/2015-01-30");
responseContext = new HashMap<>();
client.run(builder.build(), responseContext);
assertUncovered(responseContext, false, "2015-01-03/2015-01-04", "2015-01-05/2015-01-30");
builder.intervals("2015-01-04/2015-01-30");
responseContext = new HashMap<>();
client.run(builder.build(), responseContext);
assertUncovered(responseContext, false, "2015-01-05/2015-01-30");
builder.intervals("2015-01-10/2015-01-30");
responseContext = new HashMap<>();
client.run(builder.build(), responseContext);
assertUncovered(responseContext, false, "2015-01-10/2015-01-30");
builder.intervals("2015-01-01/2015-02-25");
responseContext = new HashMap<>();
client.run(builder.build(), responseContext);
assertUncovered(responseContext, true, "2015-01-01/2015-01-02", "2015-01-03/2015-01-04", "2015-01-05/2015-02-04");
}
use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class CachingClusteredClientTest method testGroupByCaching.
@Test
public void testGroupByCaching() throws Exception {
List<AggregatorFactory> aggsWithUniques = ImmutableList.<AggregatorFactory>builder().addAll(AGGS).add(new HyperUniquesAggregatorFactory("uniques", "uniques")).build();
final HashFunction hashFn = Hashing.murmur3_128();
GroupByQuery.Builder builder = new GroupByQuery.Builder().setDataSource(DATA_SOURCE).setQuerySegmentSpec(SEG_SPEC).setDimFilter(DIM_FILTER).setGranularity(GRANULARITY).setDimensions(Arrays.<DimensionSpec>asList(new DefaultDimensionSpec("a", "a"))).setAggregatorSpecs(aggsWithUniques).setPostAggregatorSpecs(POST_AGGS).setContext(CONTEXT);
final HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
collector.add(hashFn.hashString("abc123", Charsets.UTF_8).asBytes());
collector.add(hashFn.hashString("123abc", Charsets.UTF_8).asBytes());
testQueryCaching(client, builder.build(), new Interval("2011-01-01/2011-01-02"), makeGroupByResults(new DateTime("2011-01-01"), ImmutableMap.of("a", "a", "rows", 1, "imps", 1, "impers", 1, "uniques", collector)), new Interval("2011-01-02/2011-01-03"), makeGroupByResults(new DateTime("2011-01-02"), ImmutableMap.of("a", "b", "rows", 2, "imps", 2, "impers", 2, "uniques", collector)), new Interval("2011-01-05/2011-01-10"), makeGroupByResults(new DateTime("2011-01-05"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)), new Interval("2011-01-05/2011-01-10"), makeGroupByResults(new DateTime("2011-01-05T01"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06T01"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07T01"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08T01"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09T01"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)));
QueryRunner runner = new FinalizeResultsQueryRunner(client, GroupByQueryRunnerTest.makeQueryRunnerFactory(new GroupByQueryConfig()).getToolchest());
HashMap<String, Object> context = new HashMap<String, Object>();
TestHelper.assertExpectedObjects(makeGroupByResults(new DateTime("2011-01-05T"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-05T01"), ImmutableMap.of("a", "c", "rows", 3, "imps", 3, "impers", 3, "uniques", collector), new DateTime("2011-01-06T"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-06T01"), ImmutableMap.of("a", "d", "rows", 4, "imps", 4, "impers", 4, "uniques", collector), new DateTime("2011-01-07T"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-07T01"), ImmutableMap.of("a", "e", "rows", 5, "imps", 5, "impers", 5, "uniques", collector), new DateTime("2011-01-08T"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-08T01"), ImmutableMap.of("a", "f", "rows", 6, "imps", 6, "impers", 6, "uniques", collector), new DateTime("2011-01-09T"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector), new DateTime("2011-01-09T01"), ImmutableMap.of("a", "g", "rows", 7, "imps", 7, "impers", 7, "uniques", collector)), runner.run(builder.setInterval("2011-01-05/2011-01-10").build(), context), "");
}
use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class RealtimeManagerTest method setUp.
@Before
public void setUp() throws Exception {
final List<TestInputRowHolder> rows = Arrays.asList(makeRow(new DateTime("9000-01-01").getMillis()), makeRow(new ParseException("parse error")), null, makeRow(new DateTime().getMillis()));
ObjectMapper jsonMapper = new DefaultObjectMapper();
schema = new DataSchema("test", null, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper);
schema2 = new DataSchema("testV2", null, new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper);
RealtimeIOConfig ioConfig = new RealtimeIOConfig(new FirehoseFactory() {
@Override
public Firehose connect(InputRowParser parser) throws IOException {
return new TestFirehose(rows.iterator());
}
}, new PlumberSchool() {
@Override
public Plumber findPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics) {
return plumber;
}
}, null);
RealtimeIOConfig ioConfig2 = new RealtimeIOConfig(null, new PlumberSchool() {
@Override
public Plumber findPlumber(DataSchema schema, RealtimeTuningConfig config, FireDepartmentMetrics metrics) {
return plumber2;
}
}, new FirehoseFactoryV2() {
@Override
public FirehoseV2 connect(InputRowParser parser, Object arg1) throws IOException, ParseException {
return new TestFirehoseV2(rows.iterator());
}
});
RealtimeTuningConfig tuningConfig = new RealtimeTuningConfig(1, new Period("P1Y"), null, null, null, null, null, null, null, null, 0, 0, null, null);
plumber = new TestPlumber(new Sink(new Interval("0/P5000Y"), schema, tuningConfig.getShardSpec(), new DateTime().toString(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions()));
realtimeManager = new RealtimeManager(Arrays.<FireDepartment>asList(new FireDepartment(schema, ioConfig, tuningConfig)), null);
plumber2 = new TestPlumber(new Sink(new Interval("0/P5000Y"), schema2, tuningConfig.getShardSpec(), new DateTime().toString(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions()));
realtimeManager2 = new RealtimeManager(Arrays.<FireDepartment>asList(new FireDepartment(schema2, ioConfig2, tuningConfig)), null);
tuningConfig_0 = new RealtimeTuningConfig(1, new Period("P1Y"), null, null, null, null, null, new LinearShardSpec(0), null, null, 0, 0, null, null);
tuningConfig_1 = new RealtimeTuningConfig(1, new Period("P1Y"), null, null, null, null, null, new LinearShardSpec(1), null, null, 0, 0, null, null);
schema3 = new DataSchema("testing", null, new AggregatorFactory[] { new CountAggregatorFactory("ignore") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), jsonMapper);
FireDepartment department_0 = new FireDepartment(schema3, ioConfig, tuningConfig_0);
FireDepartment department_1 = new FireDepartment(schema3, ioConfig2, tuningConfig_1);
QueryRunnerFactoryConglomerate conglomerate = new QueryRunnerFactoryConglomerate() {
@Override
public <T, QueryType extends Query<T>> QueryRunnerFactory<T, QueryType> findFactory(QueryType query) {
return factory;
}
};
chiefStartedLatch = new CountDownLatch(2);
RealtimeManager.FireChief fireChief_0 = new RealtimeManager.FireChief(department_0, conglomerate) {
@Override
public void run() {
super.initPlumber();
chiefStartedLatch.countDown();
}
};
RealtimeManager.FireChief fireChief_1 = new RealtimeManager.FireChief(department_1, conglomerate) {
@Override
public void run() {
super.initPlumber();
chiefStartedLatch.countDown();
}
};
realtimeManager3 = new RealtimeManager(Arrays.asList(department_0, department_1), conglomerate, ImmutableMap.<String, Map<Integer, RealtimeManager.FireChief>>of("testing", ImmutableMap.of(0, fireChief_0, 1, fireChief_1)));
startFireChiefWithPartitionNum(fireChief_0, 0);
startFireChiefWithPartitionNum(fireChief_1, 1);
}
use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class AppenderatorTest method testQueryBySegments.
@Test
public void testQueryBySegments() throws Exception {
try (final AppenderatorTester tester = new AppenderatorTester(2)) {
final Appenderator appenderator = tester.getAppenderator();
appenderator.startJob();
appenderator.add(IDENTIFIERS.get(0), IR("2000", "foo", 1), Suppliers.ofInstance(Committers.nil()));
appenderator.add(IDENTIFIERS.get(0), IR("2000", "foo", 2), Suppliers.ofInstance(Committers.nil()));
appenderator.add(IDENTIFIERS.get(1), IR("2000", "foo", 4), Suppliers.ofInstance(Committers.nil()));
appenderator.add(IDENTIFIERS.get(2), IR("2001", "foo", 8), Suppliers.ofInstance(Committers.nil()));
appenderator.add(IDENTIFIERS.get(2), IR("2001T01", "foo", 16), Suppliers.ofInstance(Committers.nil()));
appenderator.add(IDENTIFIERS.get(2), IR("2001T02", "foo", 32), Suppliers.ofInstance(Committers.nil()));
appenderator.add(IDENTIFIERS.get(2), IR("2001T03", "foo", 64), Suppliers.ofInstance(Committers.nil()));
// Query1: segment #2
final TimeseriesQuery query1 = Druids.newTimeseriesQueryBuilder().dataSource(AppenderatorTester.DATASOURCE).aggregators(Arrays.<AggregatorFactory>asList(new LongSumAggregatorFactory("count", "count"), new LongSumAggregatorFactory("met", "met"))).granularity(Granularities.DAY).intervals(new MultipleSpecificSegmentSpec(ImmutableList.of(new SegmentDescriptor(IDENTIFIERS.get(2).getInterval(), IDENTIFIERS.get(2).getVersion(), IDENTIFIERS.get(2).getShardSpec().getPartitionNum())))).build();
final List<Result<TimeseriesResultValue>> results1 = Lists.newArrayList();
Sequences.toList(query1.run(appenderator, ImmutableMap.<String, Object>of()), results1);
Assert.assertEquals("query1", ImmutableList.of(new Result<>(new DateTime("2001"), new TimeseriesResultValue(ImmutableMap.<String, Object>of("count", 4L, "met", 120L)))), results1);
// Query2: segment #2, partial
final TimeseriesQuery query2 = Druids.newTimeseriesQueryBuilder().dataSource(AppenderatorTester.DATASOURCE).aggregators(Arrays.<AggregatorFactory>asList(new LongSumAggregatorFactory("count", "count"), new LongSumAggregatorFactory("met", "met"))).granularity(Granularities.DAY).intervals(new MultipleSpecificSegmentSpec(ImmutableList.of(new SegmentDescriptor(new Interval("2001/PT1H"), IDENTIFIERS.get(2).getVersion(), IDENTIFIERS.get(2).getShardSpec().getPartitionNum())))).build();
final List<Result<TimeseriesResultValue>> results2 = Lists.newArrayList();
Sequences.toList(query2.run(appenderator, ImmutableMap.<String, Object>of()), results2);
Assert.assertEquals("query2", ImmutableList.of(new Result<>(new DateTime("2001"), new TimeseriesResultValue(ImmutableMap.<String, Object>of("count", 1L, "met", 8L)))), results2);
// Query3: segment #2, two disjoint intervals
final TimeseriesQuery query3 = Druids.newTimeseriesQueryBuilder().dataSource(AppenderatorTester.DATASOURCE).aggregators(Arrays.<AggregatorFactory>asList(new LongSumAggregatorFactory("count", "count"), new LongSumAggregatorFactory("met", "met"))).granularity(Granularities.DAY).intervals(new MultipleSpecificSegmentSpec(ImmutableList.of(new SegmentDescriptor(new Interval("2001/PT1H"), IDENTIFIERS.get(2).getVersion(), IDENTIFIERS.get(2).getShardSpec().getPartitionNum()), new SegmentDescriptor(new Interval("2001T03/PT1H"), IDENTIFIERS.get(2).getVersion(), IDENTIFIERS.get(2).getShardSpec().getPartitionNum())))).build();
final List<Result<TimeseriesResultValue>> results3 = Lists.newArrayList();
Sequences.toList(query3.run(appenderator, ImmutableMap.<String, Object>of()), results3);
Assert.assertEquals("query2", ImmutableList.of(new Result<>(new DateTime("2001"), new TimeseriesResultValue(ImmutableMap.<String, Object>of("count", 2L, "met", 72L)))), results3);
}
}
Aggregations