use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesSerde in project druid by druid-io.
the class TimeCompareBenchmark method setup.
@Setup
public void setup() throws IOException {
log.info("SETUP CALLED AT " + System.currentTimeMillis());
ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());
executorService = Execs.multiThreaded(numSegments, "TopNThreadPool");
setupQueries();
String schemaName = "basic";
schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get(schemaName);
segmentIntervals = new Interval[numSegments];
long startMillis = schemaInfo.getDataInterval().getStartMillis();
long endMillis = schemaInfo.getDataInterval().getEndMillis();
long partialIntervalMillis = (endMillis - startMillis) / numSegments;
for (int i = 0; i < numSegments; i++) {
long partialEndMillis = startMillis + partialIntervalMillis;
segmentIntervals[i] = Intervals.utc(startMillis, partialEndMillis);
log.info("Segment [%d] with interval [%s]", i, segmentIntervals[i]);
startMillis = partialEndMillis;
}
incIndexes = new ArrayList<>();
for (int i = 0; i < numSegments; i++) {
log.info("Generating rows for segment " + i);
DataGenerator gen = new DataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED + i, segmentIntervals[i], rowsPerSegment);
IncrementalIndex incIndex = makeIncIndex();
for (int j = 0; j < rowsPerSegment; j++) {
InputRow row = gen.nextRow();
if (j % 10000 == 0) {
log.info(j + " rows generated.");
}
incIndex.add(row);
}
incIndexes.add(incIndex);
}
tmpDir = FileUtils.createTempDir();
log.info("Using temp dir: " + tmpDir.getAbsolutePath());
qIndexes = new ArrayList<>();
for (int i = 0; i < numSegments; i++) {
File indexFile = INDEX_MERGER_V9.persist(incIndexes.get(i), tmpDir, new IndexSpec(), null);
QueryableIndex qIndex = INDEX_IO.loadIndex(indexFile);
qIndexes.add(qIndex);
}
List<QueryRunner<Result<TopNResultValue>>> singleSegmentRunners = new ArrayList<>();
QueryToolChest toolChest = topNFactory.getToolchest();
for (int i = 0; i < numSegments; i++) {
SegmentId segmentId = SegmentId.dummy("qIndex " + i);
QueryRunner<Result<TopNResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(topNFactory, segmentId, new QueryableIndexSegment(qIndexes.get(i), segmentId));
singleSegmentRunners.add(new PerSegmentOptimizingQueryRunner<>(toolChest.preMergeQueryDecoration(runner), new PerSegmentQueryOptimizationContext(new SegmentDescriptor(segmentIntervals[i], "1", 0))));
}
topNRunner = toolChest.postMergeQueryDecoration(new FinalizeResultsQueryRunner<>(toolChest.mergeResults(topNFactory.mergeRunners(executorService, singleSegmentRunners)), toolChest));
List<QueryRunner<Result<TimeseriesResultValue>>> singleSegmentRunnersT = new ArrayList<>();
QueryToolChest toolChestT = timeseriesFactory.getToolchest();
for (int i = 0; i < numSegments; i++) {
SegmentId segmentId = SegmentId.dummy("qIndex " + i);
QueryRunner<Result<TimeseriesResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(timeseriesFactory, segmentId, new QueryableIndexSegment(qIndexes.get(i), segmentId));
singleSegmentRunnersT.add(new PerSegmentOptimizingQueryRunner<>(toolChestT.preMergeQueryDecoration(runner), new PerSegmentQueryOptimizationContext(new SegmentDescriptor(segmentIntervals[i], "1", 0))));
}
timeseriesRunner = toolChestT.postMergeQueryDecoration(new FinalizeResultsQueryRunner<>(toolChestT.mergeResults(timeseriesFactory.mergeRunners(executorService, singleSegmentRunnersT)), toolChestT));
}
use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesSerde in project druid by druid-io.
the class SearchBenchmark method setup.
/**
* Setup everything common for benchmarking both the incremental-index and the queriable-index.
*/
@Setup
public void setup() {
log.info("SETUP CALLED AT " + +System.currentTimeMillis());
ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());
setupQueries();
String[] schemaQuery = schemaAndQuery.split("\\.");
String schemaName = schemaQuery[0];
String queryName = schemaQuery[1];
schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get(schemaName);
queryBuilder = SCHEMA_QUERY_MAP.get(schemaName).get(queryName);
queryBuilder.limit(limit);
query = queryBuilder.build();
generator = new DataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED, schemaInfo.getDataInterval(), rowsPerSegment);
final SearchQueryConfig config = new SearchQueryConfig().withOverrides(query);
factory = new SearchQueryRunnerFactory(new SearchStrategySelector(Suppliers.ofInstance(config)), new SearchQueryQueryToolChest(config), QueryBenchmarkUtil.NOOP_QUERYWATCHER);
}
use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesSerde in project druid by druid-io.
the class ComplexMetricsTest method testConflicting.
@Test
public void testConflicting() {
ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());
ComplexMetricSerde serde = ComplexMetrics.getSerdeForType("hyperUnique");
Assert.assertNotNull(serde);
Assert.assertTrue(serde instanceof HyperUniquesSerde);
expectedException.expect(IllegalStateException.class);
expectedException.expectMessage("Incompatible serializer for type[hyperUnique] already exists. Expected [org.apache.druid.query.aggregation.SerializablePairLongStringSerde], found [org.apache.druid.query.aggregation.hyperloglog.HyperUniquesSerde");
ComplexMetrics.registerSerde("hyperUnique", new SerializablePairLongStringSerde());
serde = ComplexMetrics.getSerdeForType("hyperUnique");
Assert.assertNotNull(serde);
Assert.assertTrue(serde instanceof HyperUniquesSerde);
}
use of org.apache.druid.query.aggregation.hyperloglog.HyperUniquesSerde in project druid by druid-io.
the class SegmentGenerator method generate.
public QueryableIndex generate(final DataSegment dataSegment, final GeneratorSchemaInfo schemaInfo, final Granularity granularity, final int numRows) {
// In case we need to generate hyperUniques.
ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());
final String dataHash = Hashing.sha256().newHasher().putString(dataSegment.getId().toString(), StandardCharsets.UTF_8).putString(schemaInfo.toString(), StandardCharsets.UTF_8).putString(granularity.toString(), StandardCharsets.UTF_8).putInt(numRows).hash().toString();
final File outDir = new File(getSegmentDir(dataSegment.getId(), dataHash), "merged");
if (outDir.exists()) {
try {
log.info("Found segment with hash[%s] cached in directory[%s].", dataHash, outDir);
return TestHelper.getTestIndexIO().loadIndex(outDir);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
log.info("Writing segment with hash[%s] to directory[%s].", dataHash, outDir);
final DataGenerator dataGenerator = new DataGenerator(schemaInfo.getColumnSchemas(), dataSegment.getId().hashCode(), /* Use segment identifier hashCode as seed */
schemaInfo.getDataInterval(), numRows);
final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(schemaInfo.getDimensionsSpec()).withMetrics(schemaInfo.getAggsArray()).withRollup(schemaInfo.isWithRollup()).withQueryGranularity(granularity).build();
final List<InputRow> rows = new ArrayList<>();
final List<QueryableIndex> indexes = new ArrayList<>();
for (int i = 0; i < numRows; i++) {
final InputRow row = dataGenerator.nextRow();
rows.add(row);
if ((i + 1) % 20000 == 0) {
log.info("%,d/%,d rows generated for[%s].", i + 1, numRows, dataSegment);
}
if (rows.size() % MAX_ROWS_IN_MEMORY == 0) {
indexes.add(makeIndex(dataSegment.getId(), dataHash, indexes.size(), rows, indexSchema));
rows.clear();
}
}
log.info("%,d/%,d rows generated for[%s].", numRows, numRows, dataSegment);
if (rows.size() > 0) {
indexes.add(makeIndex(dataSegment.getId(), dataHash, indexes.size(), rows, indexSchema));
rows.clear();
}
final QueryableIndex retVal;
if (indexes.isEmpty()) {
throw new ISE("No rows to index?");
} else {
try {
final IndexSpec indexSpec = new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null);
retVal = TestHelper.getTestIndexIO().loadIndex(TestHelper.getTestIndexMergerV9(OffHeapMemorySegmentWriteOutMediumFactory.instance()).mergeQueryableIndex(indexes, false, schemaInfo.getAggs().stream().map(AggregatorFactory::getCombiningFactory).toArray(AggregatorFactory[]::new), null, outDir, indexSpec, indexSpec, new BaseProgressIndicator(), null, -1));
for (QueryableIndex index : indexes) {
index.close();
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
log.info("Finished writing segment[%s] to[%s]", dataSegment, outDir);
return retVal;
}
Aggregations