use of io.druid.query.aggregation.DoubleSumAggregatorFactory in project druid by druid-io.
the class SchemaEvolutionTest method setUp.
@Before
public void setUp() throws IOException {
// Index1: c1 is a string, c2 nonexistent, "uniques" nonexistent
index1 = IndexBuilder.create().tmpDir(temporaryFolder.newFolder()).schema(new IncrementalIndexSchema.Builder().withMetrics(new AggregatorFactory[] { new CountAggregatorFactory("cnt") }).withRollup(false).build()).rows(inputRowsWithDimensions(ImmutableList.of("c1"))).buildMMappedIndex();
// Index2: c1 is a long, c2 is a string, "uniques" is uniques on c2
index2 = IndexBuilder.create().tmpDir(temporaryFolder.newFolder()).schema(new IncrementalIndexSchema.Builder().withMetrics(new AggregatorFactory[] { new CountAggregatorFactory("cnt"), new LongSumAggregatorFactory("c1", "c1"), new HyperUniquesAggregatorFactory("uniques", "c2") }).withRollup(false).build()).rows(inputRowsWithDimensions(ImmutableList.of("c2"))).buildMMappedIndex();
// Index3: c1 is a float, c2 is a string, "uniques" is uniques on c2
index3 = IndexBuilder.create().tmpDir(temporaryFolder.newFolder()).schema(new IncrementalIndexSchema.Builder().withMetrics(new AggregatorFactory[] { new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("c1", "c1"), new HyperUniquesAggregatorFactory("uniques", "c2") }).withRollup(false).build()).rows(inputRowsWithDimensions(ImmutableList.of("c2"))).buildMMappedIndex();
// Index4: c1 is nonexistent, c2 is uniques on c2
index4 = IndexBuilder.create().tmpDir(temporaryFolder.newFolder()).schema(new IncrementalIndexSchema.Builder().withMetrics(new AggregatorFactory[] { new HyperUniquesAggregatorFactory("c2", "c2") }).withRollup(false).build()).rows(inputRowsWithDimensions(ImmutableList.<String>of())).buildMMappedIndex();
if (index4.getAvailableDimensions().size() != 0) {
// Just double-checking that the exclusions are working properly
throw new ISE("WTF?! Expected no dimensions in index4");
}
}
use of io.druid.query.aggregation.DoubleSumAggregatorFactory in project druid by druid-io.
the class DataSchemaTest method testDefaultExclusions.
@Test
public void testDefaultExclusions() throws Exception {
Map<String, Object> parser = jsonMapper.convertValue(new StringInputRowParser(new JSONParseSpec(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dimB", "dimA")), null, null), null, null), null), new TypeReference<Map<String, Object>>() {
});
DataSchema schema = new DataSchema("test", parser, new AggregatorFactory[] { new DoubleSumAggregatorFactory("metric1", "col1"), new DoubleSumAggregatorFactory("metric2", "col2") }, new ArbitraryGranularitySpec(Granularities.DAY, ImmutableList.of(Interval.parse("2014/2015"))), jsonMapper);
Assert.assertEquals(ImmutableSet.of("time", "col1", "col2", "metric1", "metric2"), schema.getParser().getParseSpec().getDimensionsSpec().getDimensionExclusions());
}
use of io.druid.query.aggregation.DoubleSumAggregatorFactory in project druid by druid-io.
the class TopNTypeInterfaceBenchmark method setupQueries.
private void setupQueries() {
// queries for the basic schema
Map<String, TopNQueryBuilder> basicQueries = new LinkedHashMap<>();
BenchmarkSchemaInfo basicSchema = BenchmarkSchemas.SCHEMA_MAP.get("basic");
{
// basic.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
queryAggs.add(new LongMaxAggregatorFactory("maxLongUniform", "maxLongUniform"));
queryAggs.add(new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"));
queryAggs.add(new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
queryAggs.add(new HyperUniquesAggregatorFactory("hyperUniquesMet", "hyper"));
// Use an IdentityExtractionFn to force usage of DimExtractionTopNAlgorithm
TopNQueryBuilder queryBuilderString = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension(new ExtractionDimensionSpec("dimSequential", "dimSequential", IdentityExtractionFn.getInstance())).metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
// DimExtractionTopNAlgorithm is always used for numeric columns
TopNQueryBuilder queryBuilderLong = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("metLongUniform").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
TopNQueryBuilder queryBuilderFloat = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("metFloatNormal").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("string", queryBuilderString);
basicQueries.put("long", queryBuilderLong);
basicQueries.put("float", queryBuilderFloat);
}
{
// basic.numericSort
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("numericSort", queryBuilderA);
}
{
// basic.alphanumericSort
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.ALPHANUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("alphanumericSort", queryBuilderA);
}
SCHEMA_QUERY_MAP.put("basic", basicQueries);
}
use of io.druid.query.aggregation.DoubleSumAggregatorFactory in project druid by druid-io.
the class IngestSegmentFirehoseFactoryTest method constructorFeeder.
@Parameterized.Parameters(name = "{1}")
public static Collection<Object[]> constructorFeeder() throws IOException {
final IndexSpec indexSpec = new IndexSpec();
final HeapMemoryTaskStorage ts = new HeapMemoryTaskStorage(new TaskStorageConfig(null) {
});
final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withQueryGranularity(Granularities.NONE).withMinTimestamp(JodaUtils.MIN_INSTANT).withDimensionsSpec(ROW_PARSER).withMetrics(new AggregatorFactory[] { new LongSumAggregatorFactory(METRIC_LONG_NAME, DIM_LONG_NAME), new DoubleSumAggregatorFactory(METRIC_FLOAT_NAME, DIM_FLOAT_NAME) }).build();
final OnheapIncrementalIndex index = new OnheapIncrementalIndex(schema, true, MAX_ROWS * MAX_SHARD_NUMBER);
for (Integer i = 0; i < MAX_ROWS; ++i) {
index.add(ROW_PARSER.parse(buildRow(i.longValue())));
}
if (!persistDir.mkdirs() && !persistDir.exists()) {
throw new IOException(String.format("Could not create directory at [%s]", persistDir.getAbsolutePath()));
}
INDEX_MERGER.persist(index, persistDir, indexSpec);
final TaskLockbox tl = new TaskLockbox(ts);
final IndexerSQLMetadataStorageCoordinator mdc = new IndexerSQLMetadataStorageCoordinator(null, null, null) {
private final Set<DataSegment> published = Sets.newHashSet();
private final Set<DataSegment> nuked = Sets.newHashSet();
@Override
public List<DataSegment> getUsedSegmentsForInterval(String dataSource, Interval interval) throws IOException {
return ImmutableList.copyOf(segmentSet);
}
@Override
public List<DataSegment> getUsedSegmentsForIntervals(String dataSource, List<Interval> interval) throws IOException {
return ImmutableList.copyOf(segmentSet);
}
@Override
public List<DataSegment> getUnusedSegmentsForInterval(String dataSource, Interval interval) {
return ImmutableList.of();
}
@Override
public Set<DataSegment> announceHistoricalSegments(Set<DataSegment> segments) {
Set<DataSegment> added = Sets.newHashSet();
for (final DataSegment segment : segments) {
if (published.add(segment)) {
added.add(segment);
}
}
return ImmutableSet.copyOf(added);
}
@Override
public void deleteSegments(Set<DataSegment> segments) {
nuked.addAll(segments);
}
};
final LocalTaskActionClientFactory tac = new LocalTaskActionClientFactory(ts, new TaskActionToolbox(tl, mdc, newMockEmitter(), EasyMock.createMock(SupervisorManager.class)));
SegmentHandoffNotifierFactory notifierFactory = EasyMock.createNiceMock(SegmentHandoffNotifierFactory.class);
EasyMock.replay(notifierFactory);
final TaskToolboxFactory taskToolboxFactory = new TaskToolboxFactory(new TaskConfig(tmpDir.getAbsolutePath(), null, null, 50000, null, false, null, null), tac, newMockEmitter(), new DataSegmentPusher() {
@Deprecated
@Override
public String getPathForHadoop(String dataSource) {
return getPathForHadoop();
}
@Override
public String getPathForHadoop() {
throw new UnsupportedOperationException();
}
@Override
public DataSegment push(File file, DataSegment segment) throws IOException {
return segment;
}
}, new DataSegmentKiller() {
@Override
public void kill(DataSegment segments) throws SegmentLoadingException {
}
@Override
public void killAll() throws IOException {
throw new UnsupportedOperationException("not implemented");
}
}, new DataSegmentMover() {
@Override
public DataSegment move(DataSegment dataSegment, Map<String, Object> targetLoadSpec) throws SegmentLoadingException {
return dataSegment;
}
}, new DataSegmentArchiver() {
@Override
public DataSegment archive(DataSegment segment) throws SegmentLoadingException {
return segment;
}
@Override
public DataSegment restore(DataSegment segment) throws SegmentLoadingException {
return segment;
}
}, // segment announcer
null, notifierFactory, // query runner factory conglomerate corporation unionized collective
null, // query executor service
null, // monitor scheduler
null, new SegmentLoaderFactory(new SegmentLoaderLocalCacheManager(null, new SegmentLoaderConfig() {
@Override
public List<StorageLocationConfig> getLocations() {
return Lists.newArrayList();
}
}, MAPPER)), MAPPER, INDEX_MERGER, INDEX_IO, null, null, INDEX_MERGER_V9);
Collection<Object[]> values = new LinkedList<>();
for (InputRowParser parser : Arrays.<InputRowParser>asList(ROW_PARSER, new MapInputRowParser(new JSONParseSpec(new TimestampSpec(TIME_COLUMN, "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.<String>of()), ImmutableList.of(DIM_FLOAT_NAME, DIM_LONG_NAME), ImmutableList.<SpatialDimensionSchema>of()), null, null)))) {
for (List<String> dim_names : Arrays.<List<String>>asList(null, ImmutableList.of(DIM_NAME))) {
for (List<String> metric_names : Arrays.<List<String>>asList(null, ImmutableList.of(METRIC_LONG_NAME, METRIC_FLOAT_NAME))) {
values.add(new Object[] { new IngestSegmentFirehoseFactory(DATA_SOURCE_NAME, FOREVER, new SelectorDimFilter(DIM_NAME, DIM_VALUE, null), dim_names, metric_names, Guice.createInjector(new Module() {
@Override
public void configure(Binder binder) {
binder.bind(TaskToolboxFactory.class).toInstance(taskToolboxFactory);
}
}), INDEX_IO), String.format("DimNames[%s]MetricNames[%s]ParserDimNames[%s]", dim_names == null ? "null" : "dims", metric_names == null ? "null" : "metrics", parser == ROW_PARSER ? "dims" : "null"), parser });
}
}
}
return values;
}
use of io.druid.query.aggregation.DoubleSumAggregatorFactory in project druid by druid-io.
the class TaskLifecycleTest method testResumeTasks.
@Test
public void testResumeTasks() throws Exception {
final Task indexTask = new IndexTask(null, null, new IndexTask.IndexIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[] { new DoubleSumAggregatorFactory("met", "met") }, new UniformGranularitySpec(Granularities.DAY, null, ImmutableList.of(new Interval("2010-01-01/P2D"))), mapper), new IndexTask.IndexIOConfig(new MockFirehoseFactory(false), false, null), new IndexTask.IndexTuningConfig(10000, 10, null, null, indexSpec, null, false, null, null)), null, MAPPER);
final long startTime = System.currentTimeMillis();
// manually insert the task into TaskStorage, waiting for TaskQueue to sync from storage
taskQueue.start();
taskStorage.insert(indexTask, TaskStatus.running(indexTask.getId()));
while (tsqa.getStatus(indexTask.getId()).get().isRunnable()) {
if (System.currentTimeMillis() > startTime + 10 * 1000) {
throw new ISE("Where did the task go?!: %s", indexTask.getId());
}
Thread.sleep(100);
}
final TaskStatus status = taskStorage.getStatus(indexTask.getId()).get();
final List<DataSegment> publishedSegments = byIntervalOrdering.sortedCopy(mdc.getPublished());
final List<DataSegment> loggedSegments = byIntervalOrdering.sortedCopy(tsqa.getInsertedSegments(indexTask.getId()));
Assert.assertEquals("statusCode", TaskStatus.Status.SUCCESS, status.getStatusCode());
Assert.assertEquals("segments logged vs published", loggedSegments, publishedSegments);
Assert.assertEquals("num segments published", 2, mdc.getPublished().size());
Assert.assertEquals("num segments nuked", 0, mdc.getNuked().size());
Assert.assertEquals("segment1 datasource", "foo", publishedSegments.get(0).getDataSource());
Assert.assertEquals("segment1 interval", new Interval("2010-01-01/P1D"), publishedSegments.get(0).getInterval());
Assert.assertEquals("segment1 dimensions", ImmutableList.of("dim1", "dim2"), publishedSegments.get(0).getDimensions());
Assert.assertEquals("segment1 metrics", ImmutableList.of("met"), publishedSegments.get(0).getMetrics());
Assert.assertEquals("segment2 datasource", "foo", publishedSegments.get(1).getDataSource());
Assert.assertEquals("segment2 interval", new Interval("2010-01-02/P1D"), publishedSegments.get(1).getInterval());
Assert.assertEquals("segment2 dimensions", ImmutableList.of("dim1", "dim2"), publishedSegments.get(1).getDimensions());
Assert.assertEquals("segment2 metrics", ImmutableList.of("met"), publishedSegments.get(1).getMetrics());
}
Aggregations