Search in sources :

Example 1 with DimensionSchema

use of io.druid.data.input.impl.DimensionSchema in project druid by druid-io.

the class GroupByQueryHelper method createIndexAccumulatorPair.

public static <T> Pair<IncrementalIndex, Accumulator<IncrementalIndex, T>> createIndexAccumulatorPair(final GroupByQuery query, final GroupByQueryConfig config, StupidPool<ByteBuffer> bufferPool, final boolean combine) {
    final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
    final Granularity gran = query.getGranularity();
    final long timeStart = query.getIntervals().get(0).getStartMillis();
    long granTimeStart = timeStart;
    if (!(Granularities.ALL.equals(gran))) {
        granTimeStart = gran.bucketStart(new DateTime(timeStart)).getMillis();
    }
    final List<AggregatorFactory> aggs;
    if (combine) {
        aggs = Lists.transform(query.getAggregatorSpecs(), new Function<AggregatorFactory, AggregatorFactory>() {

            @Override
            public AggregatorFactory apply(AggregatorFactory input) {
                return input.getCombiningFactory();
            }
        });
    } else {
        aggs = query.getAggregatorSpecs();
    }
    final List<String> dimensions = Lists.transform(query.getDimensions(), new Function<DimensionSpec, String>() {

        @Override
        public String apply(DimensionSpec input) {
            return input.getOutputName();
        }
    });
    final IncrementalIndex index;
    final boolean sortResults = query.getContextValue(CTX_KEY_SORT_RESULTS, true);
    // All groupBy dimensions are strings, for now.
    final List<DimensionSchema> dimensionSchemas = Lists.newArrayList();
    for (DimensionSpec dimension : query.getDimensions()) {
        dimensionSchemas.add(new StringDimensionSchema(dimension.getOutputName()));
    }
    final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(new DimensionsSpec(dimensionSchemas, null, null)).withMetrics(aggs.toArray(new AggregatorFactory[aggs.size()])).withQueryGranularity(gran).withMinTimestamp(granTimeStart).build();
    if (query.getContextValue("useOffheap", false)) {
        index = new OffheapIncrementalIndex(indexSchema, false, true, sortResults, querySpecificConfig.getMaxResults(), bufferPool);
    } else {
        index = new OnheapIncrementalIndex(indexSchema, false, true, sortResults, querySpecificConfig.getMaxResults());
    }
    Accumulator<IncrementalIndex, T> accumulator = new Accumulator<IncrementalIndex, T>() {

        @Override
        public IncrementalIndex accumulate(IncrementalIndex accumulated, T in) {
            if (in instanceof MapBasedRow) {
                try {
                    MapBasedRow row = (MapBasedRow) in;
                    accumulated.add(new MapBasedInputRow(row.getTimestamp(), dimensions, row.getEvent()));
                } catch (IndexSizeExceededException e) {
                    throw new ResourceLimitExceededException(e.getMessage());
                }
            } else {
                throw new ISE("Unable to accumulate something of type [%s]", in.getClass());
            }
            return accumulated;
        }
    };
    return new Pair<>(index, accumulator);
}
Also used : Accumulator(io.druid.java.util.common.guava.Accumulator) DimensionSpec(io.druid.query.dimension.DimensionSpec) OffheapIncrementalIndex(io.druid.segment.incremental.OffheapIncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) Granularity(io.druid.java.util.common.granularity.Granularity) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) DimensionSchema(io.druid.data.input.impl.DimensionSchema) DateTime(org.joda.time.DateTime) MapBasedRow(io.druid.data.input.MapBasedRow) Function(com.google.common.base.Function) ISE(io.druid.java.util.common.ISE) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) IncrementalIndexSchema(io.druid.segment.incremental.IncrementalIndexSchema) Pair(io.druid.java.util.common.Pair) OffheapIncrementalIndex(io.druid.segment.incremental.OffheapIncrementalIndex) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) ResourceLimitExceededException(io.druid.query.ResourceLimitExceededException) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException)

Example 2 with DimensionSchema

use of io.druid.data.input.impl.DimensionSchema in project druid by druid-io.

the class SegmentAnalyzerTest method testMappedWorksHelper.

private void testMappedWorksHelper(EnumSet<SegmentMetadataQuery.AnalysisType> analyses) throws Exception {
    final List<SegmentAnalysis> results = getSegmentAnalysises(new QueryableIndexSegment("test_1", TestIndex.getMMappedTestIndex()), analyses);
    Assert.assertEquals(1, results.size());
    final SegmentAnalysis analysis = results.get(0);
    Assert.assertEquals("test_1", analysis.getId());
    final Map<String, ColumnAnalysis> columns = analysis.getColumns();
    Assert.assertEquals(TestIndex.COLUMNS.length - 1, columns.size());
    for (DimensionSchema schema : TestIndex.DIMENSION_SCHEMAS) {
        final String dimension = schema.getName();
        final ColumnAnalysis columnAnalysis = columns.get(dimension);
        if (dimension.equals("null_column")) {
            Assert.assertNull(columnAnalysis);
        } else {
            final boolean isString = schema.getValueType().name().equals(ValueType.STRING.name());
            Assert.assertEquals(dimension, schema.getValueType().name(), columnAnalysis.getType());
            Assert.assertEquals(dimension, 0, columnAnalysis.getSize());
            if (isString) {
                if (analyses == null) {
                    Assert.assertTrue(dimension, columnAnalysis.getCardinality() > 0);
                } else {
                    Assert.assertEquals(dimension, 0, columnAnalysis.getCardinality().longValue());
                }
            } else {
                Assert.assertNull(dimension, columnAnalysis.getCardinality());
            }
        }
    }
    for (String metric : TestIndex.METRICS) {
        final ColumnAnalysis columnAnalysis = columns.get(metric);
        Assert.assertEquals(metric, ValueType.FLOAT.name(), columnAnalysis.getType());
        Assert.assertEquals(metric, 0, columnAnalysis.getSize());
        Assert.assertNull(metric, columnAnalysis.getCardinality());
    }
}
Also used : QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) ColumnAnalysis(io.druid.query.metadata.metadata.ColumnAnalysis) SegmentAnalysis(io.druid.query.metadata.metadata.SegmentAnalysis) DimensionSchema(io.druid.data.input.impl.DimensionSchema)

Example 3 with DimensionSchema

use of io.druid.data.input.impl.DimensionSchema in project druid by druid-io.

the class KafkaSupervisorTest method getDataSchema.

private DataSchema getDataSchema(String dataSource) {
    List<DimensionSchema> dimensions = new ArrayList<>();
    dimensions.add(StringDimensionSchema.create("dim1"));
    dimensions.add(StringDimensionSchema.create("dim2"));
    return new DataSchema(dataSource, objectMapper.convertValue(new StringInputRowParser(new JSONParseSpec(new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(dimensions, null, null), new JSONPathSpec(true, ImmutableList.<JSONPathFieldSpec>of()), ImmutableMap.<String, Boolean>of()), Charsets.UTF_8.name()), Map.class), new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, ImmutableList.<Interval>of()), objectMapper);
}
Also used : ArrayList(java.util.ArrayList) EasyMock.anyString(org.easymock.EasyMock.anyString) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) DimensionSchema(io.druid.data.input.impl.DimensionSchema) DataSchema(io.druid.segment.indexing.DataSchema) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) StringInputRowParser(io.druid.data.input.impl.StringInputRowParser) TimestampSpec(io.druid.data.input.impl.TimestampSpec) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) JSONPathSpec(io.druid.data.input.impl.JSONPathSpec) JSONParseSpec(io.druid.data.input.impl.JSONParseSpec) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Interval(org.joda.time.Interval)

Example 4 with DimensionSchema

use of io.druid.data.input.impl.DimensionSchema in project druid by druid-io.

the class OrcHadoopInputRowParserTest method testTypeFromParseSpec.

@Test
public void testTypeFromParseSpec() {
    ParseSpec parseSpec = new TimeAndDimsParseSpec(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(ImmutableList.<DimensionSchema>of(new StringDimensionSchema("col1"), new StringDimensionSchema("col2")), null, null));
    String typeString = OrcHadoopInputRowParser.typeStringFromParseSpec(parseSpec);
    String expected = "struct<timestamp:string,col1:string,col2:string>";
    Assert.assertEquals(expected, typeString);
}
Also used : TimeAndDimsParseSpec(io.druid.data.input.impl.TimeAndDimsParseSpec) TimeAndDimsParseSpec(io.druid.data.input.impl.TimeAndDimsParseSpec) ParseSpec(io.druid.data.input.impl.ParseSpec) TimestampSpec(io.druid.data.input.impl.TimestampSpec) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) DimensionSchema(io.druid.data.input.impl.DimensionSchema) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) Test(org.junit.Test)

Example 5 with DimensionSchema

use of io.druid.data.input.impl.DimensionSchema in project druid by druid-io.

the class DruidParquetReadSupport method getPartialReadSchema.

private MessageType getPartialReadSchema(InitContext context) {
    MessageType fullSchema = context.getFileSchema();
    String name = fullSchema.getName();
    HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration());
    String tsField = config.getParser().getParseSpec().getTimestampSpec().getTimestampColumn();
    List<DimensionSchema> dimensionSchema = config.getParser().getParseSpec().getDimensionsSpec().getDimensions();
    Set<String> dimensions = Sets.newHashSet();
    for (DimensionSchema dim : dimensionSchema) {
        dimensions.add(dim.getName());
    }
    Set<String> metricsFields = Sets.newHashSet();
    for (AggregatorFactory agg : config.getSchema().getDataSchema().getAggregators()) {
        metricsFields.addAll(agg.requiredFields());
    }
    List<Type> partialFields = Lists.newArrayList();
    for (Type type : fullSchema.getFields()) {
        if (tsField.equals(type.getName()) || metricsFields.contains(type.getName()) || dimensions.size() > 0 && dimensions.contains(type.getName()) || dimensions.size() == 0) {
            partialFields.add(type);
        }
    }
    return new MessageType(name, partialFields);
}
Also used : MessageType(org.apache.parquet.schema.MessageType) Type(org.apache.parquet.schema.Type) HadoopDruidIndexerConfig(io.druid.indexer.HadoopDruidIndexerConfig) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) DimensionSchema(io.druid.data.input.impl.DimensionSchema) MessageType(org.apache.parquet.schema.MessageType)

Aggregations

DimensionSchema (io.druid.data.input.impl.DimensionSchema)6 DimensionsSpec (io.druid.data.input.impl.DimensionsSpec)3 StringDimensionSchema (io.druid.data.input.impl.StringDimensionSchema)3 TimestampSpec (io.druid.data.input.impl.TimestampSpec)2 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)2 ColumnAnalysis (io.druid.query.metadata.metadata.ColumnAnalysis)2 SegmentAnalysis (io.druid.query.metadata.metadata.SegmentAnalysis)2 Function (com.google.common.base.Function)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)1 MapBasedRow (io.druid.data.input.MapBasedRow)1 JSONParseSpec (io.druid.data.input.impl.JSONParseSpec)1 JSONPathSpec (io.druid.data.input.impl.JSONPathSpec)1 ParseSpec (io.druid.data.input.impl.ParseSpec)1 StringInputRowParser (io.druid.data.input.impl.StringInputRowParser)1 TimeAndDimsParseSpec (io.druid.data.input.impl.TimeAndDimsParseSpec)1 HadoopDruidIndexerConfig (io.druid.indexer.HadoopDruidIndexerConfig)1 ISE (io.druid.java.util.common.ISE)1 Pair (io.druid.java.util.common.Pair)1 Granularity (io.druid.java.util.common.granularity.Granularity)1