Search in sources :

Example 11 with DimensionSchema

use of org.apache.druid.data.input.impl.DimensionSchema in project druid by druid-io.

the class GroupByQueryHelper method createIndexAccumulatorPair.

public static <T> Pair<IncrementalIndex, Accumulator<IncrementalIndex, T>> createIndexAccumulatorPair(final GroupByQuery query, @Nullable final GroupByQuery subquery, final GroupByQueryConfig config) {
    final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
    final Granularity gran = query.getGranularity();
    final long timeStart = query.getIntervals().get(0).getStartMillis();
    final boolean combine = subquery == null;
    long granTimeStart = timeStart;
    if (!(Granularities.ALL.equals(gran))) {
        granTimeStart = gran.bucketStart(timeStart);
    }
    final List<AggregatorFactory> aggs;
    if (combine) {
        aggs = Lists.transform(query.getAggregatorSpecs(), new Function<AggregatorFactory, AggregatorFactory>() {

            @Override
            public AggregatorFactory apply(AggregatorFactory input) {
                return input.getCombiningFactory();
            }
        });
    } else {
        aggs = query.getAggregatorSpecs();
    }
    final List<String> dimensions = Lists.transform(query.getDimensions(), new Function<DimensionSpec, String>() {

        @Override
        public String apply(DimensionSpec input) {
            return input.getOutputName();
        }
    });
    final IncrementalIndex index;
    final boolean sortResults = query.getContextValue(CTX_KEY_SORT_RESULTS, true);
    // All groupBy dimensions are strings, for now.
    final List<DimensionSchema> dimensionSchemas = new ArrayList<>();
    for (DimensionSpec dimension : query.getDimensions()) {
        dimensionSchemas.add(new StringDimensionSchema(dimension.getOutputName()));
    }
    final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(new DimensionsSpec(dimensionSchemas)).withMetrics(aggs.toArray(new AggregatorFactory[0])).withQueryGranularity(gran).withMinTimestamp(granTimeStart).build();
    final AppendableIndexBuilder indexBuilder;
    if (query.getContextValue("useOffheap", false)) {
        throw new UnsupportedOperationException("The 'useOffheap' option is no longer available for groupBy v1. Please move to the newer groupBy engine, " + "which always operates off-heap, by removing any custom 'druid.query.groupBy.defaultStrategy' runtime " + "properties and 'groupByStrategy' query context parameters that you have set.");
    } else {
        indexBuilder = new OnheapIncrementalIndex.Builder();
    }
    index = indexBuilder.setIndexSchema(indexSchema).setDeserializeComplexMetrics(false).setConcurrentEventAdd(true).setSortFacts(sortResults).setMaxRowCount(querySpecificConfig.getMaxResults()).build();
    Accumulator<IncrementalIndex, T> accumulator = new Accumulator<IncrementalIndex, T>() {

        @Override
        public IncrementalIndex accumulate(IncrementalIndex accumulated, T in) {
            final MapBasedRow mapBasedRow;
            if (in instanceof MapBasedRow) {
                mapBasedRow = (MapBasedRow) in;
            } else if (in instanceof ResultRow) {
                final ResultRow row = (ResultRow) in;
                mapBasedRow = row.toMapBasedRow(combine ? query : subquery);
            } else {
                throw new ISE("Unable to accumulate something of type [%s]", in.getClass());
            }
            try {
                accumulated.add(new MapBasedInputRow(mapBasedRow.getTimestamp(), dimensions, mapBasedRow.getEvent()));
            } catch (IndexSizeExceededException e) {
                throw new ResourceLimitExceededException(e.getMessage());
            }
            return accumulated;
        }
    };
    return new Pair<>(index, accumulator);
}
Also used : Accumulator(org.apache.druid.java.util.common.guava.Accumulator) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) AppendableIndexBuilder(org.apache.druid.segment.incremental.AppendableIndexBuilder) ArrayList(java.util.ArrayList) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) Granularity(org.apache.druid.java.util.common.granularity.Granularity) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) MapBasedRow(org.apache.druid.data.input.MapBasedRow) Function(com.google.common.base.Function) ISE(org.apache.druid.java.util.common.ISE) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) Pair(org.apache.druid.java.util.common.Pair) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) ResourceLimitExceededException(org.apache.druid.query.ResourceLimitExceededException) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException)

Example 12 with DimensionSchema

use of org.apache.druid.data.input.impl.DimensionSchema in project druid by druid-io.

the class BloomFilterSqlAggregatorTest method createQuerySegmentWalker.

@Override
public SpecificSegmentsQuerySegmentWalker createQuerySegmentWalker() throws IOException {
    InputRowParser parser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("t", "iso", null), new DimensionsSpec(ImmutableList.<DimensionSchema>builder().addAll(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim3"))).add(new DoubleDimensionSchema("d1")).add(new FloatDimensionSchema("f1")).add(new LongDimensionSchema("l1")).build())));
    final QueryableIndex index = IndexBuilder.create().tmpDir(temporaryFolder.newFolder()).segmentWriteOutMediumFactory(OffHeapMemorySegmentWriteOutMediumFactory.instance()).schema(new IncrementalIndexSchema.Builder().withMetrics(new CountAggregatorFactory("cnt"), new DoubleSumAggregatorFactory("m1", "m1")).withDimensionsSpec(parser).withRollup(false).build()).rows(CalciteTests.ROWS1_WITH_NUMERIC_DIMS).buildMMappedIndex();
    return new SpecificSegmentsQuerySegmentWalker(conglomerate).add(DataSegment.builder().dataSource(DATA_SOURCE).interval(index.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build(), index);
}
Also used : MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) SpecificSegmentsQuerySegmentWalker(org.apache.druid.sql.calcite.util.SpecificSegmentsQuerySegmentWalker) QueryableIndex(org.apache.druid.segment.QueryableIndex) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema)

Example 13 with DimensionSchema

use of org.apache.druid.data.input.impl.DimensionSchema in project druid by druid-io.

the class KinesisSupervisorTest method getDataSchema.

private static DataSchema getDataSchema(String dataSource) {
    List<DimensionSchema> dimensions = new ArrayList<>();
    dimensions.add(StringDimensionSchema.create("dim1"));
    dimensions.add(StringDimensionSchema.create("dim2"));
    return new DataSchema(dataSource, new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(dimensions), new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, ImmutableList.of()), null);
}
Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) ArrayList(java.util.ArrayList) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema)

Example 14 with DimensionSchema

use of org.apache.druid.data.input.impl.DimensionSchema in project druid by druid-io.

the class SeekableStreamSupervisorSpecTest method getDataSchema.

private static DataSchema getDataSchema() {
    List<DimensionSchema> dimensions = new ArrayList<>();
    dimensions.add(StringDimensionSchema.create("dim1"));
    dimensions.add(StringDimensionSchema.create("dim2"));
    return new DataSchema(DATASOURCE, new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(dimensions), new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, ImmutableList.of()), null);
}
Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) ArrayList(java.util.ArrayList) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema)

Example 15 with DimensionSchema

use of org.apache.druid.data.input.impl.DimensionSchema in project druid by druid-io.

the class SeekableStreamSupervisorStateTest method getDataSchema.

private static DataSchema getDataSchema() {
    List<DimensionSchema> dimensions = new ArrayList<>();
    dimensions.add(StringDimensionSchema.create("dim1"));
    dimensions.add(StringDimensionSchema.create("dim2"));
    return new DataSchema(DATASOURCE, new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(dimensions), new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, ImmutableList.of()), null);
}
Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) ArrayList(java.util.ArrayList) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema)

Aggregations

DimensionSchema (org.apache.druid.data.input.impl.DimensionSchema)17 ArrayList (java.util.ArrayList)10 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)9 StringDimensionSchema (org.apache.druid.data.input.impl.StringDimensionSchema)9 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)7 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)7 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)6 LongDimensionSchema (org.apache.druid.data.input.impl.LongDimensionSchema)5 DataSchema (org.apache.druid.segment.indexing.DataSchema)5 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)5 DoubleDimensionSchema (org.apache.druid.data.input.impl.DoubleDimensionSchema)4 FloatDimensionSchema (org.apache.druid.data.input.impl.FloatDimensionSchema)4 HashMap (java.util.HashMap)3 ImmutableList (com.google.common.collect.ImmutableList)2 File (java.io.File)2 HashSet (java.util.HashSet)2 ClientCompactionTaskGranularitySpec (org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec)2 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)2 ParseSpec (org.apache.druid.data.input.impl.ParseSpec)2 HadoopDruidIndexerConfig (org.apache.druid.indexer.HadoopDruidIndexerConfig)2