Search in sources :

Example 76 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class HadoopIngestionSpecUpdateDatasourcePathSpecSegmentsTest method testRunUpdateSegmentListIfDatasourcePathSpecIsUsed.

private HadoopDruidIndexerConfig testRunUpdateSegmentListIfDatasourcePathSpecIsUsed(PathSpec datasourcePathSpec, Interval jobInterval) throws Exception {
    HadoopIngestionSpec spec = new HadoopIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.DAY, null, ImmutableList.of(new Interval("2010-01-01/P1D"))), jsonMapper), new HadoopIOConfig(jsonMapper.convertValue(datasourcePathSpec, Map.class), null, null), null);
    spec = jsonMapper.readValue(jsonMapper.writeValueAsString(spec), HadoopIngestionSpec.class);
    UsedSegmentLister segmentLister = EasyMock.createMock(UsedSegmentLister.class);
    EasyMock.expect(segmentLister.getUsedSegmentsForIntervals(testDatasource, Lists.newArrayList(jobInterval))).andReturn(ImmutableList.of(SEGMENT));
    EasyMock.replay(segmentLister);
    spec = HadoopIngestionSpec.updateSegmentListIfDatasourcePathSpecIsUsed(spec, jsonMapper, segmentLister);
    return HadoopDruidIndexerConfig.fromString(jsonMapper.writeValueAsString(spec));
}
Also used : DataSchema(io.druid.segment.indexing.DataSchema) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) UsedSegmentLister(io.druid.indexer.path.UsedSegmentLister) Interval(org.joda.time.Interval)

Example 77 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class InputRowSerdeTest method testThrowParseExceptions.

@Test(expected = ParseException.class)
public void testThrowParseExceptions() {
    InputRow in = new MapBasedInputRow(timestamp, dims, event);
    AggregatorFactory[] aggregatorFactories = new AggregatorFactory[] { new DoubleSumAggregatorFactory("agg_non_existing", "agg_non_existing_in"), new DoubleSumAggregatorFactory("m1out", "m1"), new LongSumAggregatorFactory("m2out", "m2"), new HyperUniquesAggregatorFactory("m3out", "m3"), // Unparseable from String to Long
    new LongSumAggregatorFactory("unparseable", "m3") };
    InputRowSerde.toBytes(in, aggregatorFactories, true);
}
Also used : DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) HyperUniquesAggregatorFactory(io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) Test(org.junit.Test)

Example 78 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class DatasourcePathSpec method addInputPaths.

@Override
public Job addInputPaths(HadoopDruidIndexerConfig config, Job job) throws IOException {
    if (segments == null || segments.isEmpty()) {
        if (ingestionSpec.isIgnoreWhenNoSegments()) {
            logger.warn("No segments found for ingestionSpec [%s]", ingestionSpec);
            return job;
        } else {
            throw new ISE("No segments found for ingestion spec [%s]", ingestionSpec);
        }
    }
    logger.info("Found total [%d] segments for [%s]  in interval [%s]", segments.size(), ingestionSpec.getDataSource(), ingestionSpec.getIntervals());
    DatasourceIngestionSpec updatedIngestionSpec = ingestionSpec;
    if (updatedIngestionSpec.getDimensions() == null) {
        List<String> dims;
        if (config.getParser().getParseSpec().getDimensionsSpec().hasCustomDimensions()) {
            dims = config.getParser().getParseSpec().getDimensionsSpec().getDimensionNames();
        } else {
            Set<String> dimSet = Sets.newHashSet(Iterables.concat(Iterables.transform(segments, new Function<WindowedDataSegment, Iterable<String>>() {

                @Override
                public Iterable<String> apply(WindowedDataSegment dataSegment) {
                    return dataSegment.getSegment().getDimensions();
                }
            })));
            dims = Lists.newArrayList(Sets.difference(dimSet, config.getParser().getParseSpec().getDimensionsSpec().getDimensionExclusions()));
        }
        updatedIngestionSpec = updatedIngestionSpec.withDimensions(dims);
    }
    if (updatedIngestionSpec.getMetrics() == null) {
        Set<String> metrics = Sets.newHashSet();
        final AggregatorFactory[] cols = config.getSchema().getDataSchema().getAggregators();
        if (cols != null) {
            for (AggregatorFactory col : cols) {
                metrics.add(col.getName());
            }
        }
        updatedIngestionSpec = updatedIngestionSpec.withMetrics(Lists.newArrayList(metrics));
    }
    updatedIngestionSpec = updatedIngestionSpec.withQueryGranularity(config.getGranularitySpec().getQueryGranularity());
    job.getConfiguration().set(DatasourceInputFormat.CONF_DRUID_SCHEMA, mapper.writeValueAsString(updatedIngestionSpec));
    job.getConfiguration().set(DatasourceInputFormat.CONF_INPUT_SEGMENTS, mapper.writeValueAsString(segments));
    job.getConfiguration().set(DatasourceInputFormat.CONF_MAX_SPLIT_SIZE, String.valueOf(maxSplitSize));
    MultipleInputs.addInputPath(job, new Path("/dummy/tobe/ignored"), DatasourceInputFormat.class);
    return job;
}
Also used : DatasourceIngestionSpec(io.druid.indexer.hadoop.DatasourceIngestionSpec) Path(org.apache.hadoop.fs.Path) WindowedDataSegment(io.druid.indexer.hadoop.WindowedDataSegment) ISE(io.druid.java.util.common.ISE) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory)

Example 79 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.

the class GranularityPathSpecTest method testAddInputPath.

@Test
public void testAddInputPath() throws Exception {
    UserGroupInformation.setLoginUser(UserGroupInformation.createUserForTesting("test", new String[] { "testGroup" }));
    HadoopIngestionSpec spec = new HadoopIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.DAY, Granularities.MINUTE, ImmutableList.of(new Interval("2015-11-06T00:00Z/2015-11-07T00:00Z"))), jsonMapper), new HadoopIOConfig(null, null, null), DEFAULT_TUNING_CONFIG);
    granularityPathSpec.setDataGranularity(Granularities.HOUR);
    granularityPathSpec.setFilePattern(".*");
    granularityPathSpec.setInputFormat(TextInputFormat.class);
    Job job = Job.getInstance();
    String formatStr = "file:%s/%s;org.apache.hadoop.mapreduce.lib.input.TextInputFormat";
    testFolder.newFolder("test", "y=2015", "m=11", "d=06", "H=00");
    testFolder.newFolder("test", "y=2015", "m=11", "d=06", "H=02");
    testFolder.newFolder("test", "y=2015", "m=11", "d=06", "H=05");
    testFolder.newFile("test/y=2015/m=11/d=06/H=00/file1");
    testFolder.newFile("test/y=2015/m=11/d=06/H=02/file2");
    testFolder.newFile("test/y=2015/m=11/d=06/H=05/file3");
    testFolder.newFile("test/y=2015/m=11/d=06/H=05/file4");
    granularityPathSpec.setInputPath(testFolder.getRoot().getPath() + "/test");
    granularityPathSpec.addInputPaths(HadoopDruidIndexerConfig.fromSpec(spec), job);
    String actual = job.getConfiguration().get("mapreduce.input.multipleinputs.dir.formats");
    String expected = Joiner.on(",").join(Lists.newArrayList(String.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=00/file1"), String.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=02/file2"), String.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=05/file3"), String.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=05/file4")));
    Assert.assertEquals("Did not find expected input paths", expected, actual);
}
Also used : HadoopIngestionSpec(io.druid.indexer.HadoopIngestionSpec) DataSchema(io.druid.segment.indexing.DataSchema) UniformGranularitySpec(io.druid.segment.indexing.granularity.UniformGranularitySpec) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) Job(org.apache.hadoop.mapreduce.Job) HadoopIOConfig(io.druid.indexer.HadoopIOConfig) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 80 with AggregatorFactory

use of io.druid.query.aggregation.AggregatorFactory in project hive by apache.

the class DruidGroupByQueryRecordReader method next.

@Override
public boolean next(NullWritable key, DruidWritable value) {
    if (nextKeyValue()) {
        // Update value
        value.getValue().clear();
        // 1) The timestamp column
        value.getValue().put(DruidTable.DEFAULT_TIMESTAMP_COLUMN, current.getTimestamp().getMillis());
        // 2) The dimension columns
        for (int i = 0; i < query.getDimensions().size(); i++) {
            DimensionSpec ds = query.getDimensions().get(i);
            List<String> dims = current.getDimension(ds.getDimension());
            if (dims.size() == 0) {
                // NULL value for dimension
                value.getValue().put(ds.getOutputName(), null);
            } else {
                int pos = dims.size() - indexes[i] - 1;
                value.getValue().put(ds.getOutputName(), dims.get(pos));
            }
        }
        int counter = 0;
        // 3) The aggregation columns
        for (AggregatorFactory af : query.getAggregatorSpecs()) {
            switch(extractors[counter++]) {
                case FLOAT:
                    value.getValue().put(af.getName(), current.getFloatMetric(af.getName()));
                    break;
                case LONG:
                    value.getValue().put(af.getName(), current.getLongMetric(af.getName()));
                    break;
            }
        }
        // 4) The post-aggregation columns
        for (PostAggregator pa : query.getPostAggregatorSpecs()) {
            assert extractors[counter++] == Extract.FLOAT;
            value.getValue().put(pa.getName(), current.getFloatMetric(pa.getName()));
        }
        return true;
    }
    return false;
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) PostAggregator(io.druid.query.aggregation.PostAggregator) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory)

Aggregations

AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)148 Test (org.junit.Test)86 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)82 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)64 Interval (org.joda.time.Interval)45 DoubleSumAggregatorFactory (io.druid.query.aggregation.DoubleSumAggregatorFactory)38 DateTime (org.joda.time.DateTime)37 FilteredAggregatorFactory (io.druid.query.aggregation.FilteredAggregatorFactory)32 Result (io.druid.query.Result)31 DoubleMaxAggregatorFactory (io.druid.query.aggregation.DoubleMaxAggregatorFactory)27 HyperUniquesAggregatorFactory (io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory)25 Row (io.druid.data.input.Row)24 PostAggregator (io.druid.query.aggregation.PostAggregator)24 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)22 CardinalityAggregatorFactory (io.druid.query.aggregation.cardinality.CardinalityAggregatorFactory)19 LongMaxAggregatorFactory (io.druid.query.aggregation.LongMaxAggregatorFactory)18 LongFirstAggregatorFactory (io.druid.query.aggregation.first.LongFirstAggregatorFactory)18 LongLastAggregatorFactory (io.druid.query.aggregation.last.LongLastAggregatorFactory)18 DimensionSpec (io.druid.query.dimension.DimensionSpec)18 TimeseriesQuery (io.druid.query.timeseries.TimeseriesQuery)17