Search in sources :

Example 1 with StringDimensionSchema

use of io.druid.data.input.impl.StringDimensionSchema in project druid by druid-io.

the class GroupByQueryHelper method createIndexAccumulatorPair.

public static <T> Pair<IncrementalIndex, Accumulator<IncrementalIndex, T>> createIndexAccumulatorPair(final GroupByQuery query, final GroupByQueryConfig config, StupidPool<ByteBuffer> bufferPool, final boolean combine) {
    final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
    final Granularity gran = query.getGranularity();
    final long timeStart = query.getIntervals().get(0).getStartMillis();
    long granTimeStart = timeStart;
    if (!(Granularities.ALL.equals(gran))) {
        granTimeStart = gran.bucketStart(new DateTime(timeStart)).getMillis();
    }
    final List<AggregatorFactory> aggs;
    if (combine) {
        aggs = Lists.transform(query.getAggregatorSpecs(), new Function<AggregatorFactory, AggregatorFactory>() {

            @Override
            public AggregatorFactory apply(AggregatorFactory input) {
                return input.getCombiningFactory();
            }
        });
    } else {
        aggs = query.getAggregatorSpecs();
    }
    final List<String> dimensions = Lists.transform(query.getDimensions(), new Function<DimensionSpec, String>() {

        @Override
        public String apply(DimensionSpec input) {
            return input.getOutputName();
        }
    });
    final IncrementalIndex index;
    final boolean sortResults = query.getContextValue(CTX_KEY_SORT_RESULTS, true);
    // All groupBy dimensions are strings, for now.
    final List<DimensionSchema> dimensionSchemas = Lists.newArrayList();
    for (DimensionSpec dimension : query.getDimensions()) {
        dimensionSchemas.add(new StringDimensionSchema(dimension.getOutputName()));
    }
    final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(new DimensionsSpec(dimensionSchemas, null, null)).withMetrics(aggs.toArray(new AggregatorFactory[aggs.size()])).withQueryGranularity(gran).withMinTimestamp(granTimeStart).build();
    if (query.getContextValue("useOffheap", false)) {
        index = new OffheapIncrementalIndex(indexSchema, false, true, sortResults, querySpecificConfig.getMaxResults(), bufferPool);
    } else {
        index = new OnheapIncrementalIndex(indexSchema, false, true, sortResults, querySpecificConfig.getMaxResults());
    }
    Accumulator<IncrementalIndex, T> accumulator = new Accumulator<IncrementalIndex, T>() {

        @Override
        public IncrementalIndex accumulate(IncrementalIndex accumulated, T in) {
            if (in instanceof MapBasedRow) {
                try {
                    MapBasedRow row = (MapBasedRow) in;
                    accumulated.add(new MapBasedInputRow(row.getTimestamp(), dimensions, row.getEvent()));
                } catch (IndexSizeExceededException e) {
                    throw new ResourceLimitExceededException(e.getMessage());
                }
            } else {
                throw new ISE("Unable to accumulate something of type [%s]", in.getClass());
            }
            return accumulated;
        }
    };
    return new Pair<>(index, accumulator);
}
Also used : Accumulator(io.druid.java.util.common.guava.Accumulator) DimensionSpec(io.druid.query.dimension.DimensionSpec) OffheapIncrementalIndex(io.druid.segment.incremental.OffheapIncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) Granularity(io.druid.java.util.common.granularity.Granularity) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) DimensionSchema(io.druid.data.input.impl.DimensionSchema) DateTime(org.joda.time.DateTime) MapBasedRow(io.druid.data.input.MapBasedRow) Function(com.google.common.base.Function) ISE(io.druid.java.util.common.ISE) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) IncrementalIndexSchema(io.druid.segment.incremental.IncrementalIndexSchema) Pair(io.druid.java.util.common.Pair) OffheapIncrementalIndex(io.druid.segment.incremental.OffheapIncrementalIndex) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) ResourceLimitExceededException(io.druid.query.ResourceLimitExceededException) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException)

Example 2 with StringDimensionSchema

use of io.druid.data.input.impl.StringDimensionSchema in project druid by druid-io.

the class OrcHadoopInputRowParserTest method testTypeFromParseSpec.

@Test
public void testTypeFromParseSpec() {
    ParseSpec parseSpec = new TimeAndDimsParseSpec(new TimestampSpec("timestamp", "auto", null), new DimensionsSpec(ImmutableList.<DimensionSchema>of(new StringDimensionSchema("col1"), new StringDimensionSchema("col2")), null, null));
    String typeString = OrcHadoopInputRowParser.typeStringFromParseSpec(parseSpec);
    String expected = "struct<timestamp:string,col1:string,col2:string>";
    Assert.assertEquals(expected, typeString);
}
Also used : TimeAndDimsParseSpec(io.druid.data.input.impl.TimeAndDimsParseSpec) TimeAndDimsParseSpec(io.druid.data.input.impl.TimeAndDimsParseSpec) ParseSpec(io.druid.data.input.impl.ParseSpec) TimestampSpec(io.druid.data.input.impl.TimestampSpec) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) DimensionSchema(io.druid.data.input.impl.DimensionSchema) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) Test(org.junit.Test)

Example 3 with StringDimensionSchema

use of io.druid.data.input.impl.StringDimensionSchema in project druid by druid-io.

the class IncrementalIndexMultiValueSpecTest method test.

@Test
public void test() throws IndexSizeExceededException {
    DimensionsSpec dimensionsSpec = new DimensionsSpec(Arrays.<DimensionSchema>asList(new StringDimensionSchema("string1", DimensionSchema.MultiValueHandling.ARRAY), new StringDimensionSchema("string2", DimensionSchema.MultiValueHandling.SORTED_ARRAY), new StringDimensionSchema("string3", DimensionSchema.MultiValueHandling.SORTED_SET)), null, null);
    IncrementalIndexSchema schema = new IncrementalIndexSchema(0, new TimestampSpec("ds", "auto", null), Granularities.ALL, VirtualColumns.EMPTY, dimensionsSpec, new AggregatorFactory[0], false);
    Map<String, Object> map = new HashMap<String, Object>() {

        @Override
        public Object get(Object key) {
            if (((String) key).startsWith("string")) {
                return Arrays.asList("xsd", "aba", "fds", "aba");
            }
            if (((String) key).startsWith("float")) {
                return Arrays.<Float>asList(3.92f, -2.76f, 42.153f, Float.NaN, -2.76f, -2.76f);
            }
            if (((String) key).startsWith("long")) {
                return Arrays.<Long>asList(-231238789L, 328L, 923L, 328L, -2L, 0L);
            }
            return null;
        }
    };
    IncrementalIndex<?> index = new OnheapIncrementalIndex(schema, true, 10000);
    index.add(new MapBasedInputRow(0, Arrays.asList("string1", "string2", "string3", "float1", "float2", "float3", "long1", "long2", "long3"), map));
    Row row = index.iterator().next();
    Assert.assertEquals(Lists.newArrayList("xsd", "aba", "fds", "aba"), row.getRaw("string1"));
    Assert.assertEquals(Lists.newArrayList("aba", "aba", "fds", "xsd"), row.getRaw("string2"));
    Assert.assertEquals(Lists.newArrayList("aba", "fds", "xsd"), row.getRaw("string3"));
}
Also used : HashMap(java.util.HashMap) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) TimestampSpec(io.druid.data.input.impl.TimestampSpec) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Row(io.druid.data.input.Row) Test(org.junit.Test)

Example 4 with StringDimensionSchema

use of io.druid.data.input.impl.StringDimensionSchema in project druid by druid-io.

the class IndexMergerTest method getIndexWithNumericDims.

private IncrementalIndex getIndexWithNumericDims() throws Exception {
    IncrementalIndex index = getIndexWithDimsFromSchemata(Arrays.asList(new LongDimensionSchema("dimA"), new FloatDimensionSchema("dimB"), new StringDimensionSchema("dimC")));
    index.add(new MapBasedInputRow(1, Arrays.asList("dimA", "dimB", "dimC"), ImmutableMap.<String, Object>of("dimA", 100L, "dimB", 4000.567, "dimC", "Hello")));
    index.add(new MapBasedInputRow(1, Arrays.asList("dimA", "dimB", "dimC"), ImmutableMap.<String, Object>of("dimA", 72L, "dimB", 60000.789, "dimC", "World")));
    index.add(new MapBasedInputRow(1, Arrays.asList("dimA", "dimB", "dimC"), ImmutableMap.<String, Object>of("dimA", 3001L, "dimB", 1.2345, "dimC", "Foobar")));
    index.add(new MapBasedInputRow(1, Arrays.asList("dimA", "dimB", "dimC"), ImmutableMap.<String, Object>of("dimC", "Nully Row")));
    return index;
}
Also used : IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) LongDimensionSchema(io.druid.data.input.impl.LongDimensionSchema) FloatDimensionSchema(io.druid.data.input.impl.FloatDimensionSchema) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema)

Example 5 with StringDimensionSchema

use of io.druid.data.input.impl.StringDimensionSchema in project druid by druid-io.

the class IncrementalIndexTest method constructorFeeder.

@Parameterized.Parameters
public static Collection<?> constructorFeeder() throws IOException {
    DimensionsSpec dimensions = new DimensionsSpec(Arrays.<DimensionSchema>asList(new StringDimensionSchema("string"), new StringDimensionSchema("float"), new StringDimensionSchema("long")), null, null);
    AggregatorFactory[] metrics = { new FilteredAggregatorFactory(new CountAggregatorFactory("cnt"), new SelectorDimFilter("billy", "A", null)) };
    final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withMinTimestamp(0).withQueryGranularity(Granularities.MINUTE).withDimensionsSpec(dimensions).withMetrics(metrics).withRollup(true).build();
    final List<Object[]> constructors = Lists.newArrayList();
    for (final Boolean sortFacts : ImmutableList.of(false, true)) {
        constructors.add(new Object[] { new IndexCreator() {

            @Override
            public IncrementalIndex createIndex() {
                return new OnheapIncrementalIndex(schema, false, true, sortFacts, 1000);
            }
        } });
        constructors.add(new Object[] { new IndexCreator() {

            @Override
            public IncrementalIndex createIndex() {
                return new OffheapIncrementalIndex(schema, true, true, sortFacts, 1000000, new StupidPool<ByteBuffer>("OffheapIncrementalIndex-bufferPool", new Supplier<ByteBuffer>() {

                    @Override
                    public ByteBuffer get() {
                        return ByteBuffer.allocate(256 * 1024);
                    }
                }));
            }
        } });
    }
    return constructors;
}
Also used : FilteredAggregatorFactory(io.druid.query.aggregation.FilteredAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) FilteredAggregatorFactory(io.druid.query.aggregation.FilteredAggregatorFactory) ByteBuffer(java.nio.ByteBuffer) StringDimensionSchema(io.druid.data.input.impl.StringDimensionSchema) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) SelectorDimFilter(io.druid.query.filter.SelectorDimFilter) DimensionsSpec(io.druid.data.input.impl.DimensionsSpec) StupidPool(io.druid.collections.StupidPool)

Aggregations

StringDimensionSchema (io.druid.data.input.impl.StringDimensionSchema)6 DimensionsSpec (io.druid.data.input.impl.DimensionsSpec)5 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)3 TimestampSpec (io.druid.data.input.impl.TimestampSpec)3 Test (org.junit.Test)3 DimensionSchema (io.druid.data.input.impl.DimensionSchema)2 TimeAndDimsParseSpec (io.druid.data.input.impl.TimeAndDimsParseSpec)2 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)2 IncrementalIndex (io.druid.segment.incremental.IncrementalIndex)2 OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)2 Function (com.google.common.base.Function)1 StupidPool (io.druid.collections.StupidPool)1 MapBasedRow (io.druid.data.input.MapBasedRow)1 Row (io.druid.data.input.Row)1 FloatDimensionSchema (io.druid.data.input.impl.FloatDimensionSchema)1 InputRowParser (io.druid.data.input.impl.InputRowParser)1 LongDimensionSchema (io.druid.data.input.impl.LongDimensionSchema)1 ParseSpec (io.druid.data.input.impl.ParseSpec)1 ISE (io.druid.java.util.common.ISE)1 Pair (io.druid.java.util.common.Pair)1