use of org.apache.druid.segment.data.RoaringBitmapSerdeFactory in project druid by druid-io.
the class IndexTaskSerdeTest method testSerdeTuningConfigWithHashedPartitionsSpec.
@Test
public void testSerdeTuningConfigWithHashedPartitionsSpec() throws IOException {
final IndexTuningConfig tuningConfig = new IndexTuningConfig(null, null, null, 100, 2000L, null, null, null, null, null, new HashedPartitionsSpec(null, 10, ImmutableList.of("dim1", "dim2")), new IndexSpec(new RoaringBitmapSerdeFactory(false), CompressionStrategy.LZ4, CompressionStrategy.LZF, LongEncodingStrategy.LONGS), null, null, true, null, null, 100L, OffHeapMemorySegmentWriteOutMediumFactory.instance(), true, 10, 100, null, -1L);
assertSerdeTuningConfig(tuningConfig);
}
use of org.apache.druid.segment.data.RoaringBitmapSerdeFactory in project druid by druid-io.
the class SegmentGenerator method generate.
public QueryableIndex generate(final DataSegment dataSegment, final GeneratorSchemaInfo schemaInfo, final Granularity granularity, final int numRows) {
// In case we need to generate hyperUniques.
ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());
final String dataHash = Hashing.sha256().newHasher().putString(dataSegment.getId().toString(), StandardCharsets.UTF_8).putString(schemaInfo.toString(), StandardCharsets.UTF_8).putString(granularity.toString(), StandardCharsets.UTF_8).putInt(numRows).hash().toString();
final File outDir = new File(getSegmentDir(dataSegment.getId(), dataHash), "merged");
if (outDir.exists()) {
try {
log.info("Found segment with hash[%s] cached in directory[%s].", dataHash, outDir);
return TestHelper.getTestIndexIO().loadIndex(outDir);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
log.info("Writing segment with hash[%s] to directory[%s].", dataHash, outDir);
final DataGenerator dataGenerator = new DataGenerator(schemaInfo.getColumnSchemas(), dataSegment.getId().hashCode(), /* Use segment identifier hashCode as seed */
schemaInfo.getDataInterval(), numRows);
final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(schemaInfo.getDimensionsSpec()).withMetrics(schemaInfo.getAggsArray()).withRollup(schemaInfo.isWithRollup()).withQueryGranularity(granularity).build();
final List<InputRow> rows = new ArrayList<>();
final List<QueryableIndex> indexes = new ArrayList<>();
for (int i = 0; i < numRows; i++) {
final InputRow row = dataGenerator.nextRow();
rows.add(row);
if ((i + 1) % 20000 == 0) {
log.info("%,d/%,d rows generated for[%s].", i + 1, numRows, dataSegment);
}
if (rows.size() % MAX_ROWS_IN_MEMORY == 0) {
indexes.add(makeIndex(dataSegment.getId(), dataHash, indexes.size(), rows, indexSchema));
rows.clear();
}
}
log.info("%,d/%,d rows generated for[%s].", numRows, numRows, dataSegment);
if (rows.size() > 0) {
indexes.add(makeIndex(dataSegment.getId(), dataHash, indexes.size(), rows, indexSchema));
rows.clear();
}
final QueryableIndex retVal;
if (indexes.isEmpty()) {
throw new ISE("No rows to index?");
} else {
try {
final IndexSpec indexSpec = new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null);
retVal = TestHelper.getTestIndexIO().loadIndex(TestHelper.getTestIndexMergerV9(OffHeapMemorySegmentWriteOutMediumFactory.instance()).mergeQueryableIndex(indexes, false, schemaInfo.getAggs().stream().map(AggregatorFactory::getCombiningFactory).toArray(AggregatorFactory[]::new), null, outDir, indexSpec, indexSpec, new BaseProgressIndicator(), null, -1));
for (QueryableIndex index : indexes) {
index.close();
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
log.info("Finished writing segment[%s] to[%s]", dataSegment, outDir);
return retVal;
}
use of org.apache.druid.segment.data.RoaringBitmapSerdeFactory in project druid by druid-io.
the class CompactionTaskTest method testCreateIngestionSchemaWithNumShards.
@Test
public void testCreateIngestionSchemaWithNumShards() throws IOException, SegmentLoadingException {
final CompactionTask.CompactionTuningConfig tuningConfig = new CompactionTask.CompactionTuningConfig(null, null, null, 500000, 1000000L, null, null, null, null, new HashedPartitionsSpec(null, 3, null), new IndexSpec(new RoaringBitmapSerdeFactory(true), CompressionStrategy.LZ4, CompressionStrategy.LZF, LongEncodingStrategy.LONGS), null, null, true, false, 5000L, null, null, 10, null, null, null, null, null, null, null, null, null, null, null);
final List<ParallelIndexIngestionSpec> ingestionSpecs = CompactionTask.createIngestionSchema(toolbox, LockGranularity.TIME_CHUNK, new SegmentProvider(DATA_SOURCE, new CompactionIntervalSpec(COMPACTION_INTERVAL, null)), new PartitionConfigurationManager(tuningConfig), null, null, null, null, COORDINATOR_CLIENT, segmentCacheManagerFactory, RETRY_POLICY_FACTORY, IOConfig.DEFAULT_DROP_EXISTING);
final List<DimensionsSpec> expectedDimensionsSpec = getExpectedDimensionsSpecForAutoGeneration();
ingestionSpecs.sort((s1, s2) -> Comparators.intervalsByStartThenEnd().compare(s1.getDataSchema().getGranularitySpec().inputIntervals().get(0), s2.getDataSchema().getGranularitySpec().inputIntervals().get(0)));
Assert.assertEquals(6, ingestionSpecs.size());
assertIngestionSchema(ingestionSpecs, expectedDimensionsSpec, AGGREGATORS.stream().map(AggregatorFactory::getCombiningFactory).collect(Collectors.toList()), SEGMENT_INTERVALS, tuningConfig, Granularities.MONTH, Granularities.NONE, IOConfig.DEFAULT_DROP_EXISTING);
}
use of org.apache.druid.segment.data.RoaringBitmapSerdeFactory in project druid by druid-io.
the class IndexSpecTest method testSerde.
@Test
public void testSerde() throws Exception {
final ObjectMapper objectMapper = new DefaultObjectMapper();
final String json = "{ \"bitmap\" : { \"type\" : \"roaring\" }, \"dimensionCompression\" : \"lz4\", \"metricCompression\" : \"lzf\"" + ", \"longEncoding\" : \"auto\" }";
final IndexSpec spec = objectMapper.readValue(json, IndexSpec.class);
Assert.assertEquals(new RoaringBitmapSerdeFactory(null), spec.getBitmapSerdeFactory());
Assert.assertEquals(CompressionStrategy.LZ4, spec.getDimensionCompression());
Assert.assertEquals(CompressionStrategy.LZF, spec.getMetricCompression());
Assert.assertEquals(CompressionFactory.LongEncodingStrategy.AUTO, spec.getLongEncoding());
Assert.assertEquals(spec, objectMapper.readValue(objectMapper.writeValueAsBytes(spec), IndexSpec.class));
}
use of org.apache.druid.segment.data.RoaringBitmapSerdeFactory in project druid by druid-io.
the class DictionaryEncodedColumnPartSerdeTest method testSerde.
@Test
public void testSerde() throws Exception {
// bitmapSerdeFactory not specified
String json = "{\n" + " \"type\": \"stringDictionary\",\n" + " \"byteOrder\": \"BIG_ENDIAN\"\n" + "}\n";
ObjectMapper mapper = TestHelper.makeJsonMapper();
DictionaryEncodedColumnPartSerde serde = (DictionaryEncodedColumnPartSerde) mapper.readValue(mapper.writeValueAsString(mapper.readValue(json, ColumnPartSerde.class)), ColumnPartSerde.class);
Assert.assertEquals(ByteOrder.BIG_ENDIAN, serde.getByteOrder());
Assert.assertTrue(serde.getBitmapSerdeFactory() instanceof ConciseBitmapSerdeFactory);
// bitmapSerdeFactory specified
json = "{\n" + "\"type\": \"stringDictionary\",\n" + "\"byteOrder\": \"LITTLE_ENDIAN\",\n" + "\"bitmapSerdeFactory\": { \"type\": \"roaring\" }\n" + "}";
serde = (DictionaryEncodedColumnPartSerde) mapper.readValue(mapper.writeValueAsString(mapper.readValue(json, ColumnPartSerde.class)), ColumnPartSerde.class);
Assert.assertEquals(ByteOrder.LITTLE_ENDIAN, serde.getByteOrder());
Assert.assertTrue(serde.getBitmapSerdeFactory() instanceof RoaringBitmapSerdeFactory);
}
Aggregations