use of io.druid.query.aggregation.DoubleSumAggregatorFactory in project druid by druid-io.
the class IncrementalIndexTest method testConcurrentAddRead.
@Test(timeout = 60_000L)
public void testConcurrentAddRead() throws InterruptedException, ExecutionException {
final int dimensionCount = 5;
final ArrayList<AggregatorFactory> ingestAggregatorFactories = new ArrayList<>(dimensionCount + 1);
ingestAggregatorFactories.add(new CountAggregatorFactory("rows"));
for (int i = 0; i < dimensionCount; ++i) {
ingestAggregatorFactories.add(new LongSumAggregatorFactory(String.format("sumResult%s", i), String.format("Dim_%s", i)));
ingestAggregatorFactories.add(new DoubleSumAggregatorFactory(String.format("doubleSumResult%s", i), String.format("Dim_%s", i)));
}
final ArrayList<AggregatorFactory> queryAggregatorFactories = new ArrayList<>(dimensionCount + 1);
queryAggregatorFactories.add(new CountAggregatorFactory("rows"));
for (int i = 0; i < dimensionCount; ++i) {
queryAggregatorFactories.add(new LongSumAggregatorFactory(String.format("sumResult%s", i), String.format("sumResult%s", i)));
queryAggregatorFactories.add(new DoubleSumAggregatorFactory(String.format("doubleSumResult%s", i), String.format("doubleSumResult%s", i)));
}
final IncrementalIndex index = closer.closeLater(indexCreator.createIndex(ingestAggregatorFactories.toArray(new AggregatorFactory[dimensionCount])));
final int concurrentThreads = 2;
final int elementsPerThread = 10_000;
final ListeningExecutorService indexExecutor = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(concurrentThreads, new ThreadFactoryBuilder().setDaemon(false).setNameFormat("index-executor-%d").setPriority(Thread.MIN_PRIORITY).build()));
final ListeningExecutorService queryExecutor = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(concurrentThreads, new ThreadFactoryBuilder().setDaemon(false).setNameFormat("query-executor-%d").build()));
final long timestamp = System.currentTimeMillis();
final Interval queryInterval = new Interval("1900-01-01T00:00:00Z/2900-01-01T00:00:00Z");
final List<ListenableFuture<?>> indexFutures = Lists.newArrayListWithExpectedSize(concurrentThreads);
final List<ListenableFuture<?>> queryFutures = Lists.newArrayListWithExpectedSize(concurrentThreads);
final Segment incrementalIndexSegment = new IncrementalIndexSegment(index, null);
final QueryRunnerFactory factory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator()), new TimeseriesQueryEngine(), QueryRunnerTestHelper.NOOP_QUERYWATCHER);
final AtomicInteger currentlyRunning = new AtomicInteger(0);
final AtomicInteger concurrentlyRan = new AtomicInteger(0);
final AtomicInteger someoneRan = new AtomicInteger(0);
final CountDownLatch startLatch = new CountDownLatch(1);
final CountDownLatch readyLatch = new CountDownLatch(concurrentThreads * 2);
final AtomicInteger queriesAccumualted = new AtomicInteger(0);
for (int j = 0; j < concurrentThreads; j++) {
indexFutures.add(indexExecutor.submit(new Runnable() {
@Override
public void run() {
readyLatch.countDown();
try {
startLatch.await();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw Throwables.propagate(e);
}
currentlyRunning.incrementAndGet();
try {
for (int i = 0; i < elementsPerThread; i++) {
index.add(getLongRow(timestamp + i, i, dimensionCount));
someoneRan.incrementAndGet();
}
} catch (IndexSizeExceededException e) {
throw Throwables.propagate(e);
}
currentlyRunning.decrementAndGet();
}
}));
final TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("xxx").granularity(Granularities.ALL).intervals(ImmutableList.of(queryInterval)).aggregators(queryAggregatorFactories).build();
queryFutures.add(queryExecutor.submit(new Runnable() {
@Override
public void run() {
readyLatch.countDown();
try {
startLatch.await();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw Throwables.propagate(e);
}
while (concurrentlyRan.get() == 0) {
QueryRunner<Result<TimeseriesResultValue>> runner = new FinalizeResultsQueryRunner<Result<TimeseriesResultValue>>(factory.createRunner(incrementalIndexSegment), factory.getToolchest());
Map<String, Object> context = new HashMap<String, Object>();
Sequence<Result<TimeseriesResultValue>> sequence = runner.run(query, context);
for (Double result : sequence.accumulate(new Double[0], new Accumulator<Double[], Result<TimeseriesResultValue>>() {
@Override
public Double[] accumulate(Double[] accumulated, Result<TimeseriesResultValue> in) {
if (currentlyRunning.get() > 0) {
concurrentlyRan.incrementAndGet();
}
queriesAccumualted.incrementAndGet();
return Lists.asList(in.getValue().getDoubleMetric("doubleSumResult0"), accumulated).toArray(new Double[accumulated.length + 1]);
}
})) {
final Integer maxValueExpected = someoneRan.get() + concurrentThreads;
if (maxValueExpected > 0) {
// Eventually consistent, but should be somewhere in that range
// Actual result is validated after all writes are guaranteed done.
Assert.assertTrue(String.format("%d >= %g >= 0 violated", maxValueExpected, result), result >= 0 && result <= maxValueExpected);
}
}
}
}
}));
}
readyLatch.await();
startLatch.countDown();
List<ListenableFuture<?>> allFutures = new ArrayList<>(queryFutures.size() + indexFutures.size());
allFutures.addAll(queryFutures);
allFutures.addAll(indexFutures);
Futures.allAsList(allFutures).get();
Assert.assertTrue("Queries ran too fast", queriesAccumualted.get() > 0);
Assert.assertTrue("Did not hit concurrency, please try again", concurrentlyRan.get() > 0);
queryExecutor.shutdown();
indexExecutor.shutdown();
QueryRunner<Result<TimeseriesResultValue>> runner = new FinalizeResultsQueryRunner<Result<TimeseriesResultValue>>(factory.createRunner(incrementalIndexSegment), factory.getToolchest());
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("xxx").granularity(Granularities.ALL).intervals(ImmutableList.of(queryInterval)).aggregators(queryAggregatorFactories).build();
Map<String, Object> context = new HashMap<String, Object>();
List<Result<TimeseriesResultValue>> results = Sequences.toList(runner.run(query, context), new LinkedList<Result<TimeseriesResultValue>>());
boolean isRollup = index.isRollup();
for (Result<TimeseriesResultValue> result : results) {
Assert.assertEquals(elementsPerThread * (isRollup ? 1 : concurrentThreads), result.getValue().getLongMetric("rows").intValue());
for (int i = 0; i < dimensionCount; ++i) {
Assert.assertEquals(String.format("Failed long sum on dimension %d", i), elementsPerThread * concurrentThreads, result.getValue().getLongMetric(String.format("sumResult%s", i)).intValue());
Assert.assertEquals(String.format("Failed double sum on dimension %d", i), elementsPerThread * concurrentThreads, result.getValue().getDoubleMetric(String.format("doubleSumResult%s", i)).intValue());
}
}
}
use of io.druid.query.aggregation.DoubleSumAggregatorFactory in project druid by druid-io.
the class InputRowSerdeTest method testThrowParseExceptions.
@Test(expected = ParseException.class)
public void testThrowParseExceptions() {
InputRow in = new MapBasedInputRow(timestamp, dims, event);
AggregatorFactory[] aggregatorFactories = new AggregatorFactory[] { new DoubleSumAggregatorFactory("agg_non_existing", "agg_non_existing_in"), new DoubleSumAggregatorFactory("m1out", "m1"), new LongSumAggregatorFactory("m2out", "m2"), new HyperUniquesAggregatorFactory("m3out", "m3"), // Unparseable from String to Long
new LongSumAggregatorFactory("unparseable", "m3") };
InputRowSerde.toBytes(in, aggregatorFactories, true);
}
use of io.druid.query.aggregation.DoubleSumAggregatorFactory in project hive by apache.
the class DruidOutputFormat method getHiveRecordWriter.
@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
final String segmentGranularity = tableProperties.getProperty(Constants.DRUID_SEGMENT_GRANULARITY) != null ? tableProperties.getProperty(Constants.DRUID_SEGMENT_GRANULARITY) : HiveConf.getVar(jc, HiveConf.ConfVars.HIVE_DRUID_INDEXING_GRANULARITY);
final String dataSource = tableProperties.getProperty(Constants.DRUID_DATA_SOURCE);
final String segmentDirectory = tableProperties.getProperty(Constants.DRUID_SEGMENT_DIRECTORY) != null ? tableProperties.getProperty(Constants.DRUID_SEGMENT_DIRECTORY) : HiveConf.getVar(jc, HiveConf.ConfVars.DRUID_SEGMENT_DIRECTORY);
final HdfsDataSegmentPusherConfig hdfsDataSegmentPusherConfig = new HdfsDataSegmentPusherConfig();
hdfsDataSegmentPusherConfig.setStorageDirectory(segmentDirectory);
final DataSegmentPusher hdfsDataSegmentPusher = new HdfsDataSegmentPusher(hdfsDataSegmentPusherConfig, jc, DruidStorageHandlerUtils.JSON_MAPPER);
final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularity.valueOf(segmentGranularity), QueryGranularity.fromString(tableProperties.getProperty(Constants.DRUID_QUERY_GRANULARITY) == null ? "NONE" : tableProperties.getProperty(Constants.DRUID_QUERY_GRANULARITY)), null);
final String columnNameProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMNS);
final String columnTypeProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMN_TYPES);
if (StringUtils.isEmpty(columnNameProperty) || StringUtils.isEmpty(columnTypeProperty)) {
throw new IllegalStateException(String.format("List of columns names [%s] or columns type [%s] is/are not present", columnNameProperty, columnTypeProperty));
}
ArrayList<String> columnNames = new ArrayList<String>();
for (String name : columnNameProperty.split(",")) {
columnNames.add(name);
}
if (!columnNames.contains(DruidTable.DEFAULT_TIMESTAMP_COLUMN)) {
throw new IllegalStateException("Timestamp column (' " + DruidTable.DEFAULT_TIMESTAMP_COLUMN + "') not specified in create table; list of columns is : " + tableProperties.getProperty(serdeConstants.LIST_COLUMNS));
}
ArrayList<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
// Default, all columns that are not metrics or timestamp, are treated as dimensions
final List<DimensionSchema> dimensions = new ArrayList<>();
ImmutableList.Builder<AggregatorFactory> aggregatorFactoryBuilder = ImmutableList.builder();
for (int i = 0; i < columnTypes.size(); i++) {
PrimitiveTypeInfo f = (PrimitiveTypeInfo) columnTypes.get(i);
AggregatorFactory af;
switch(f.getPrimitiveCategory()) {
case BYTE:
case SHORT:
case INT:
case LONG:
af = new LongSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
break;
case FLOAT:
case DOUBLE:
case DECIMAL:
af = new DoubleSumAggregatorFactory(columnNames.get(i), columnNames.get(i));
break;
case TIMESTAMP:
String tColumnName = columnNames.get(i);
if (!tColumnName.equals(DruidTable.DEFAULT_TIMESTAMP_COLUMN) && !tColumnName.equals(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME)) {
throw new IOException("Dimension " + tColumnName + " does not have STRING type: " + f.getPrimitiveCategory());
}
continue;
default:
// Dimension
String dColumnName = columnNames.get(i);
if (PrimitiveObjectInspectorUtils.getPrimitiveGrouping(f.getPrimitiveCategory()) != PrimitiveGrouping.STRING_GROUP) {
throw new IOException("Dimension " + dColumnName + " does not have STRING type: " + f.getPrimitiveCategory());
}
dimensions.add(new StringDimensionSchema(dColumnName));
continue;
}
aggregatorFactoryBuilder.add(af);
}
List<AggregatorFactory> aggregatorFactories = aggregatorFactoryBuilder.build();
final InputRowParser inputRowParser = new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(DruidTable.DEFAULT_TIMESTAMP_COLUMN, "auto", null), new DimensionsSpec(dimensions, Lists.newArrayList(Constants.DRUID_TIMESTAMP_GRANULARITY_COL_NAME), null)));
Map<String, Object> inputParser = DruidStorageHandlerUtils.JSON_MAPPER.convertValue(inputRowParser, Map.class);
final DataSchema dataSchema = new DataSchema(Preconditions.checkNotNull(dataSource, "Data source name is null"), inputParser, aggregatorFactories.toArray(new AggregatorFactory[aggregatorFactories.size()]), granularitySpec, DruidStorageHandlerUtils.JSON_MAPPER);
final String workingPath = jc.get(Constants.DRUID_JOB_WORKING_DIRECTORY);
final String version = jc.get(Constants.DRUID_SEGMENT_VERSION);
Integer maxPartitionSize = HiveConf.getIntVar(jc, HiveConf.ConfVars.HIVE_DRUID_MAX_PARTITION_SIZE);
String basePersistDirectory = HiveConf.getVar(jc, HiveConf.ConfVars.HIVE_DRUID_BASE_PERSIST_DIRECTORY);
if (Strings.isNullOrEmpty(basePersistDirectory)) {
basePersistDirectory = System.getProperty("java.io.tmpdir");
}
Integer maxRowInMemory = HiveConf.getIntVar(jc, HiveConf.ConfVars.HIVE_DRUID_MAX_ROW_IN_MEMORY);
RealtimeTuningConfig realtimeTuningConfig = new RealtimeTuningConfig(maxRowInMemory, null, null, new File(basePersistDirectory, dataSource), new CustomVersioningPolicy(version), null, null, null, null, true, 0, 0, true, null);
LOG.debug(String.format("running with Data schema [%s] ", dataSchema));
return new DruidRecordWriter(dataSchema, realtimeTuningConfig, hdfsDataSegmentPusher, maxPartitionSize, new Path(workingPath, SEGMENTS_DESCRIPTOR_DIR_NAME), finalOutPath.getFileSystem(jc));
}
use of io.druid.query.aggregation.DoubleSumAggregatorFactory in project druid by druid-io.
the class HadoopConverterJobTest method setUp.
@Before
public void setUp() throws Exception {
final MetadataStorageUpdaterJobSpec metadataStorageUpdaterJobSpec = new MetadataStorageUpdaterJobSpec() {
@Override
public String getSegmentTable() {
return derbyConnectorRule.metadataTablesConfigSupplier().get().getSegmentsTable();
}
@Override
public MetadataStorageConnectorConfig get() {
return derbyConnectorRule.getMetadataConnectorConfig();
}
};
final File scratchFileDir = temporaryFolder.newFolder();
storageLocProperty = System.getProperty(STORAGE_PROPERTY_KEY);
tmpSegmentDir = temporaryFolder.newFolder();
System.setProperty(STORAGE_PROPERTY_KEY, tmpSegmentDir.getAbsolutePath());
final URL url = Preconditions.checkNotNull(Query.class.getClassLoader().getResource("druid.sample.tsv"));
final File tmpInputFile = temporaryFolder.newFile();
FileUtils.retryCopy(new ByteSource() {
@Override
public InputStream openStream() throws IOException {
return url.openStream();
}
}, tmpInputFile, FileUtils.IS_EXCEPTION, 3);
final HadoopDruidIndexerConfig hadoopDruidIndexerConfig = new HadoopDruidIndexerConfig(new HadoopIngestionSpec(new DataSchema(DATASOURCE, HadoopDruidIndexerConfig.JSON_MAPPER.convertValue(new StringInputRowParser(new DelimitedParseSpec(new TimestampSpec("ts", "iso", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList(TestIndex.DIMENSIONS)), null, null), "\t", "", Arrays.asList(TestIndex.COLUMNS)), null), Map.class), new AggregatorFactory[] { new DoubleSumAggregatorFactory(TestIndex.METRICS[0], TestIndex.METRICS[0]), new HyperUniquesAggregatorFactory("quality_uniques", "quality") }, new UniformGranularitySpec(Granularities.MONTH, Granularities.DAY, ImmutableList.<Interval>of(interval)), HadoopDruidIndexerConfig.JSON_MAPPER), new HadoopIOConfig(ImmutableMap.<String, Object>of("type", "static", "paths", tmpInputFile.getAbsolutePath()), metadataStorageUpdaterJobSpec, tmpSegmentDir.getAbsolutePath()), new HadoopTuningConfig(scratchFileDir.getAbsolutePath(), null, null, null, null, null, false, false, false, false, null, false, false, null, null, null, false, false)));
metadataStorageTablesConfigSupplier = derbyConnectorRule.metadataTablesConfigSupplier();
connector = derbyConnectorRule.getConnector();
try {
connector.getDBI().withHandle(new HandleCallback<Void>() {
@Override
public Void withHandle(Handle handle) throws Exception {
handle.execute("DROP TABLE druid_segments");
return null;
}
});
} catch (CallbackFailedException e) {
// Who cares
}
List<Jobby> jobs = ImmutableList.of(new Jobby() {
@Override
public boolean run() {
connector.createSegmentTable(metadataStorageUpdaterJobSpec.getSegmentTable());
return true;
}
}, new HadoopDruidDetermineConfigurationJob(hadoopDruidIndexerConfig), new HadoopDruidIndexerJob(hadoopDruidIndexerConfig, new SQLMetadataStorageUpdaterJobHandler(connector)));
JobHelper.runJobs(jobs, hadoopDruidIndexerConfig);
}
use of io.druid.query.aggregation.DoubleSumAggregatorFactory in project druid by druid-io.
the class InputRowSerdeTest method testSerde.
@Test
public void testSerde() {
InputRow in = new MapBasedInputRow(timestamp, dims, event);
AggregatorFactory[] aggregatorFactories = new AggregatorFactory[] { new DoubleSumAggregatorFactory("agg_non_existing", "agg_non_existing_in"), new DoubleSumAggregatorFactory("m1out", "m1"), new LongSumAggregatorFactory("m2out", "m2"), new HyperUniquesAggregatorFactory("m3out", "m3"), // Unparseable from String to Long
new LongSumAggregatorFactory("unparseable", "m3") };
// Ignore Unparseable aggregator
byte[] data = InputRowSerde.toBytes(in, aggregatorFactories, false);
InputRow out = InputRowSerde.fromBytes(data, aggregatorFactories);
Assert.assertEquals(timestamp, out.getTimestampFromEpoch());
Assert.assertEquals(dims, out.getDimensions());
Assert.assertEquals(Collections.EMPTY_LIST, out.getDimension("dim_non_existing"));
Assert.assertEquals(ImmutableList.of("d1v"), out.getDimension("d1"));
Assert.assertEquals(ImmutableList.of("d2v1", "d2v2"), out.getDimension("d2"));
Assert.assertEquals(0.0f, out.getFloatMetric("agg_non_existing"), 0.00001);
Assert.assertEquals(5.0f, out.getFloatMetric("m1out"), 0.00001);
Assert.assertEquals(100L, out.getLongMetric("m2out"));
Assert.assertEquals(1, ((HyperLogLogCollector) out.getRaw("m3out")).estimateCardinality(), 0.001);
Assert.assertEquals(0L, out.getLongMetric("unparseable"));
}
Aggregations