use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class HadoopIngestionSpecUpdateDatasourcePathSpecSegmentsTest method testRunUpdateSegmentListIfDatasourcePathSpecIsUsed.
private HadoopDruidIndexerConfig testRunUpdateSegmentListIfDatasourcePathSpecIsUsed(PathSpec datasourcePathSpec, Interval jobInterval) throws Exception {
HadoopIngestionSpec spec = new HadoopIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.DAY, null, ImmutableList.of(new Interval("2010-01-01/P1D"))), jsonMapper), new HadoopIOConfig(jsonMapper.convertValue(datasourcePathSpec, Map.class), null, null), null);
spec = jsonMapper.readValue(jsonMapper.writeValueAsString(spec), HadoopIngestionSpec.class);
UsedSegmentLister segmentLister = EasyMock.createMock(UsedSegmentLister.class);
EasyMock.expect(segmentLister.getUsedSegmentsForIntervals(testDatasource, Lists.newArrayList(jobInterval))).andReturn(ImmutableList.of(SEGMENT));
EasyMock.replay(segmentLister);
spec = HadoopIngestionSpec.updateSegmentListIfDatasourcePathSpecIsUsed(spec, jsonMapper, segmentLister);
return HadoopDruidIndexerConfig.fromString(jsonMapper.writeValueAsString(spec));
}
use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class InputRowSerdeTest method testThrowParseExceptions.
@Test(expected = ParseException.class)
public void testThrowParseExceptions() {
InputRow in = new MapBasedInputRow(timestamp, dims, event);
AggregatorFactory[] aggregatorFactories = new AggregatorFactory[] { new DoubleSumAggregatorFactory("agg_non_existing", "agg_non_existing_in"), new DoubleSumAggregatorFactory("m1out", "m1"), new LongSumAggregatorFactory("m2out", "m2"), new HyperUniquesAggregatorFactory("m3out", "m3"), // Unparseable from String to Long
new LongSumAggregatorFactory("unparseable", "m3") };
InputRowSerde.toBytes(in, aggregatorFactories, true);
}
use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class DatasourcePathSpec method addInputPaths.
@Override
public Job addInputPaths(HadoopDruidIndexerConfig config, Job job) throws IOException {
if (segments == null || segments.isEmpty()) {
if (ingestionSpec.isIgnoreWhenNoSegments()) {
logger.warn("No segments found for ingestionSpec [%s]", ingestionSpec);
return job;
} else {
throw new ISE("No segments found for ingestion spec [%s]", ingestionSpec);
}
}
logger.info("Found total [%d] segments for [%s] in interval [%s]", segments.size(), ingestionSpec.getDataSource(), ingestionSpec.getIntervals());
DatasourceIngestionSpec updatedIngestionSpec = ingestionSpec;
if (updatedIngestionSpec.getDimensions() == null) {
List<String> dims;
if (config.getParser().getParseSpec().getDimensionsSpec().hasCustomDimensions()) {
dims = config.getParser().getParseSpec().getDimensionsSpec().getDimensionNames();
} else {
Set<String> dimSet = Sets.newHashSet(Iterables.concat(Iterables.transform(segments, new Function<WindowedDataSegment, Iterable<String>>() {
@Override
public Iterable<String> apply(WindowedDataSegment dataSegment) {
return dataSegment.getSegment().getDimensions();
}
})));
dims = Lists.newArrayList(Sets.difference(dimSet, config.getParser().getParseSpec().getDimensionsSpec().getDimensionExclusions()));
}
updatedIngestionSpec = updatedIngestionSpec.withDimensions(dims);
}
if (updatedIngestionSpec.getMetrics() == null) {
Set<String> metrics = Sets.newHashSet();
final AggregatorFactory[] cols = config.getSchema().getDataSchema().getAggregators();
if (cols != null) {
for (AggregatorFactory col : cols) {
metrics.add(col.getName());
}
}
updatedIngestionSpec = updatedIngestionSpec.withMetrics(Lists.newArrayList(metrics));
}
updatedIngestionSpec = updatedIngestionSpec.withQueryGranularity(config.getGranularitySpec().getQueryGranularity());
job.getConfiguration().set(DatasourceInputFormat.CONF_DRUID_SCHEMA, mapper.writeValueAsString(updatedIngestionSpec));
job.getConfiguration().set(DatasourceInputFormat.CONF_INPUT_SEGMENTS, mapper.writeValueAsString(segments));
job.getConfiguration().set(DatasourceInputFormat.CONF_MAX_SPLIT_SIZE, String.valueOf(maxSplitSize));
MultipleInputs.addInputPath(job, new Path("/dummy/tobe/ignored"), DatasourceInputFormat.class);
return job;
}
use of io.druid.query.aggregation.AggregatorFactory in project druid by druid-io.
the class GranularityPathSpecTest method testAddInputPath.
@Test
public void testAddInputPath() throws Exception {
UserGroupInformation.setLoginUser(UserGroupInformation.createUserForTesting("test", new String[] { "testGroup" }));
HadoopIngestionSpec spec = new HadoopIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.DAY, Granularities.MINUTE, ImmutableList.of(new Interval("2015-11-06T00:00Z/2015-11-07T00:00Z"))), jsonMapper), new HadoopIOConfig(null, null, null), DEFAULT_TUNING_CONFIG);
granularityPathSpec.setDataGranularity(Granularities.HOUR);
granularityPathSpec.setFilePattern(".*");
granularityPathSpec.setInputFormat(TextInputFormat.class);
Job job = Job.getInstance();
String formatStr = "file:%s/%s;org.apache.hadoop.mapreduce.lib.input.TextInputFormat";
testFolder.newFolder("test", "y=2015", "m=11", "d=06", "H=00");
testFolder.newFolder("test", "y=2015", "m=11", "d=06", "H=02");
testFolder.newFolder("test", "y=2015", "m=11", "d=06", "H=05");
testFolder.newFile("test/y=2015/m=11/d=06/H=00/file1");
testFolder.newFile("test/y=2015/m=11/d=06/H=02/file2");
testFolder.newFile("test/y=2015/m=11/d=06/H=05/file3");
testFolder.newFile("test/y=2015/m=11/d=06/H=05/file4");
granularityPathSpec.setInputPath(testFolder.getRoot().getPath() + "/test");
granularityPathSpec.addInputPaths(HadoopDruidIndexerConfig.fromSpec(spec), job);
String actual = job.getConfiguration().get("mapreduce.input.multipleinputs.dir.formats");
String expected = Joiner.on(",").join(Lists.newArrayList(String.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=00/file1"), String.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=02/file2"), String.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=05/file3"), String.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=05/file4")));
Assert.assertEquals("Did not find expected input paths", expected, actual);
}
use of io.druid.query.aggregation.AggregatorFactory in project hive by apache.
the class DruidGroupByQueryRecordReader method next.
@Override
public boolean next(NullWritable key, DruidWritable value) {
if (nextKeyValue()) {
// Update value
value.getValue().clear();
// 1) The timestamp column
value.getValue().put(DruidTable.DEFAULT_TIMESTAMP_COLUMN, current.getTimestamp().getMillis());
// 2) The dimension columns
for (int i = 0; i < query.getDimensions().size(); i++) {
DimensionSpec ds = query.getDimensions().get(i);
List<String> dims = current.getDimension(ds.getDimension());
if (dims.size() == 0) {
// NULL value for dimension
value.getValue().put(ds.getOutputName(), null);
} else {
int pos = dims.size() - indexes[i] - 1;
value.getValue().put(ds.getOutputName(), dims.get(pos));
}
}
int counter = 0;
// 3) The aggregation columns
for (AggregatorFactory af : query.getAggregatorSpecs()) {
switch(extractors[counter++]) {
case FLOAT:
value.getValue().put(af.getName(), current.getFloatMetric(af.getName()));
break;
case LONG:
value.getValue().put(af.getName(), current.getLongMetric(af.getName()));
break;
}
}
// 4) The post-aggregation columns
for (PostAggregator pa : query.getPostAggregatorSpecs()) {
assert extractors[counter++] == Extract.FLOAT;
value.getValue().put(pa.getName(), current.getFloatMetric(pa.getName()));
}
return true;
}
return false;
}
Aggregations