use of org.apache.druid.segment.incremental.IncrementalIndexSchema in project druid by druid-io.
the class SegmentAnalyzerTest method testAnalyzingSegmentWithNonExistentAggregator.
/**
* This test verifies that if a segment was created using an unknown/invalid aggregator
* (which can happen if an aggregator was removed for a later version), then,
* analyzing the segment doesn't fail and the result of analysis of the complex column
* is reported as an error.
* @throws IOException
*/
@Test
public void testAnalyzingSegmentWithNonExistentAggregator() throws IOException {
final URL resource = SegmentAnalyzerTest.class.getClassLoader().getResource("druid.sample.numeric.tsv");
CharSource source = Resources.asByteSource(resource).asCharSource(StandardCharsets.UTF_8);
String invalid_aggregator = "invalid_aggregator";
AggregatorFactory[] metrics = new AggregatorFactory[] { new DoubleSumAggregatorFactory(TestIndex.DOUBLE_METRICS[0], "index"), new HyperUniquesAggregatorFactory("quality_uniques", "quality"), new InvalidAggregatorFactory(invalid_aggregator, "quality") };
final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withMinTimestamp(DateTimes.of("2011-01-12T00:00:00.000Z").getMillis()).withTimestampSpec(new TimestampSpec("ds", "auto", null)).withDimensionsSpec(TestIndex.DIMENSIONS_SPEC).withMetrics(metrics).withRollup(true).build();
final IncrementalIndex retVal = new OnheapIncrementalIndex.Builder().setIndexSchema(schema).setMaxRowCount(10000).build();
IncrementalIndex incrementalIndex = TestIndex.loadIncrementalIndex(retVal, source);
// Analyze the in-memory segment.
{
SegmentAnalyzer analyzer = new SegmentAnalyzer(EnumSet.of(SegmentMetadataQuery.AnalysisType.SIZE));
IncrementalIndexSegment segment = new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("ds"));
Map<String, ColumnAnalysis> analyses = analyzer.analyze(segment);
ColumnAnalysis columnAnalysis = analyses.get(invalid_aggregator);
Assert.assertFalse(columnAnalysis.isError());
Assert.assertEquals("invalid_complex_column_type", columnAnalysis.getType());
Assert.assertEquals(ColumnType.ofComplex("invalid_complex_column_type"), columnAnalysis.getTypeSignature());
}
// Persist the index.
final File segmentFile = TestIndex.INDEX_MERGER.persist(incrementalIndex, temporaryFolder.newFolder(), TestIndex.INDEX_SPEC, null);
// Unload the complex serde, then analyze the persisted segment.
ComplexMetrics.unregisterSerde(InvalidAggregatorFactory.TYPE);
{
SegmentAnalyzer analyzer = new SegmentAnalyzer(EnumSet.of(SegmentMetadataQuery.AnalysisType.SIZE));
QueryableIndexSegment segment = new QueryableIndexSegment(TestIndex.INDEX_IO.loadIndex(segmentFile), SegmentId.dummy("ds"));
Map<String, ColumnAnalysis> analyses = analyzer.analyze(segment);
ColumnAnalysis invalidColumnAnalysis = analyses.get(invalid_aggregator);
Assert.assertTrue(invalidColumnAnalysis.isError());
Assert.assertEquals("error:unknown_complex_invalid_complex_column_type", invalidColumnAnalysis.getErrorMessage());
// Run a segment metadata query also to verify it doesn't break
final List<SegmentAnalysis> results = getSegmentAnalysises(segment, EnumSet.of(SegmentMetadataQuery.AnalysisType.SIZE));
for (SegmentAnalysis result : results) {
Assert.assertTrue(result.getColumns().get(invalid_aggregator).isError());
}
}
}
use of org.apache.druid.segment.incremental.IncrementalIndexSchema in project druid by druid-io.
the class Sink method makeNewCurrIndex.
private FireHydrant makeNewCurrIndex(long minTimestamp, DataSchema schema) {
final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withMinTimestamp(minTimestamp).withTimestampSpec(schema.getTimestampSpec()).withQueryGranularity(schema.getGranularitySpec().getQueryGranularity()).withDimensionsSpec(schema.getDimensionsSpec()).withMetrics(schema.getAggregators()).withRollup(schema.getGranularitySpec().isRollup()).build();
// Build the incremental-index according to the spec that was chosen by the user
final IncrementalIndex newIndex = appendableIndexSpec.builder().setIndexSchema(indexSchema).setMaxRowCount(maxRowsInMemory).setMaxBytesInMemory(maxBytesInMemory).setUseMaxMemoryEstimates(useMaxMemoryEstimates).build();
final FireHydrant old;
synchronized (hydrantLock) {
if (writable) {
old = currHydrant;
int newCount = 0;
int numHydrants = hydrants.size();
if (numHydrants > 0) {
FireHydrant lastHydrant = hydrants.get(numHydrants - 1);
newCount = lastHydrant.getCount() + 1;
if (!indexSchema.getDimensionsSpec().hasCustomDimensions()) {
Map<String, ColumnCapabilities> oldCapabilities;
if (lastHydrant.hasSwapped()) {
oldCapabilities = new HashMap<>();
ReferenceCountingSegment segment = lastHydrant.getIncrementedSegment();
try {
QueryableIndex oldIndex = segment.asQueryableIndex();
for (String dim : oldIndex.getAvailableDimensions()) {
dimOrder.add(dim);
oldCapabilities.put(dim, oldIndex.getColumnHolder(dim).getCapabilities());
}
} finally {
segment.decrement();
}
} else {
IncrementalIndex oldIndex = lastHydrant.getIndex();
dimOrder.addAll(oldIndex.getDimensionOrder());
oldCapabilities = oldIndex.getColumnCapabilities();
}
newIndex.loadDimensionIterable(dimOrder, oldCapabilities);
}
}
currHydrant = new FireHydrant(newIndex, newCount, getSegment().getId());
if (old != null) {
numRowsExcludingCurrIndex.addAndGet(old.getIndex().size());
}
hydrants.add(currHydrant);
} else {
// Oops, someone called finishWriting while we were making this new index.
newIndex.close();
throw new ISE("finishWriting() called during swap");
}
}
return old;
}
use of org.apache.druid.segment.incremental.IncrementalIndexSchema in project druid by druid-io.
the class DataGeneratorTest method testToIndex.
@Test
public void testToIndex() {
List<GeneratorColumnSchema> schemas = new ArrayList<>();
schemas.add(GeneratorColumnSchema.makeSequential("dimA", ValueType.STRING, false, 1, null, 10, 20));
schemas.add(GeneratorColumnSchema.makeEnumeratedSequential("dimB", ValueType.STRING, false, 1, null, Arrays.asList("Hello", "World", "Foo", "Bar")));
schemas.add(GeneratorColumnSchema.makeSequential("dimC", ValueType.STRING, false, 1, 0.50, 30, 40));
DataGenerator dataGenerator = new DataGenerator(schemas, 9999, 0, 0, 1000.0);
DimensionsSpec dimensions = new DimensionsSpec(Arrays.asList(new StringDimensionSchema("dimA"), new StringDimensionSchema("dimB"), new StringDimensionSchema("dimC")));
AggregatorFactory[] metrics = { new CountAggregatorFactory("cnt") };
final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withQueryGranularity(Granularities.MINUTE).withDimensionsSpec(dimensions).withMetrics(metrics).withRollup(false).build();
IncrementalIndex index = new OnheapIncrementalIndex.Builder().setIndexSchema(schema).setSortFacts(false).setMaxRowCount(1_000_000).build();
dataGenerator.addToIndex(index, 100);
Assert.assertEquals(100, index.size());
}
use of org.apache.druid.segment.incremental.IncrementalIndexSchema in project druid by druid-io.
the class SegmentGenerator method generateIncrementalIndex.
public IncrementalIndex generateIncrementalIndex(final DataSegment dataSegment, final GeneratorSchemaInfo schemaInfo, final Granularity granularity, final int numRows) {
// In case we need to generate hyperUniques.
ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());
final String dataHash = Hashing.sha256().newHasher().putString(dataSegment.getId().toString(), StandardCharsets.UTF_8).putString(schemaInfo.toString(), StandardCharsets.UTF_8).putString(granularity.toString(), StandardCharsets.UTF_8).putInt(numRows).hash().toString();
final DataGenerator dataGenerator = new DataGenerator(schemaInfo.getColumnSchemas(), dataSegment.getId().hashCode(), /* Use segment identifier hashCode as seed */
schemaInfo.getDataInterval(), numRows);
final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(schemaInfo.getDimensionsSpec()).withMetrics(schemaInfo.getAggsArray()).withRollup(schemaInfo.isWithRollup()).withQueryGranularity(granularity).build();
final List<InputRow> rows = new ArrayList<>();
for (int i = 0; i < numRows; i++) {
final InputRow row = dataGenerator.nextRow();
rows.add(row);
if ((i + 1) % 20000 == 0) {
log.info("%,d/%,d rows generated for[%s].", i + 1, numRows, dataSegment);
}
}
log.info("%,d/%,d rows generated for[%s].", numRows, numRows, dataSegment);
return makeIncrementalIndex(dataSegment.getId(), dataHash, 0, rows, indexSchema);
}
use of org.apache.druid.segment.incremental.IncrementalIndexSchema in project druid by druid-io.
the class IndexMergerTestBase method testMultivalDim_mergeAcrossSegments_rollupWorks.
@Test
public void testMultivalDim_mergeAcrossSegments_rollupWorks() throws Exception {
List<String> dims = Arrays.asList("dimA", "dimMultiVal");
IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(new DimensionsSpec(ImmutableList.of(new StringDimensionSchema("dimA", MultiValueHandling.SORTED_ARRAY, true), new StringDimensionSchema("dimMultiVal", MultiValueHandling.SORTED_ARRAY, true)))).withMetrics(new LongSumAggregatorFactory("sumCount", "sumCount")).withRollup(true).build();
IncrementalIndex toPersistA = new OnheapIncrementalIndex.Builder().setIndexSchema(indexSchema).setMaxRowCount(1000).build();
Map<String, Object> event1 = new HashMap<>();
event1.put("dimA", "leek");
event1.put("dimMultiVal", ImmutableList.of("1", "2", "4"));
event1.put("sumCount", 1L);
Map<String, Object> event2 = new HashMap<>();
event2.put("dimA", "leek");
event2.put("dimMultiVal", ImmutableList.of("1", "2", "3", "5"));
event2.put("sumCount", 1L);
toPersistA.add(new MapBasedInputRow(1, dims, event1));
toPersistA.add(new MapBasedInputRow(1, dims, event2));
IncrementalIndex toPersistB = new OnheapIncrementalIndex.Builder().setIndexSchema(indexSchema).setMaxRowCount(1000).build();
Map<String, Object> event3 = new HashMap<>();
event3.put("dimA", "leek");
event3.put("dimMultiVal", ImmutableList.of("1", "2", "4"));
event3.put("sumCount", 1L);
Map<String, Object> event4 = new HashMap<>();
event4.put("dimA", "potato");
event4.put("dimMultiVal", ImmutableList.of("0", "1", "4"));
event4.put("sumCount", 1L);
toPersistB.add(new MapBasedInputRow(1, dims, event3));
toPersistB.add(new MapBasedInputRow(1, dims, event4));
final File tmpDirA = temporaryFolder.newFolder();
final File tmpDirB = temporaryFolder.newFolder();
final File tmpDirMerged = temporaryFolder.newFolder();
QueryableIndex indexA = closer.closeLater(indexIO.loadIndex(indexMerger.persist(toPersistA, tmpDirA, indexSpec, null)));
QueryableIndex indexB = closer.closeLater(indexIO.loadIndex(indexMerger.persist(toPersistB, tmpDirB, indexSpec, null)));
final QueryableIndex merged = closer.closeLater(indexIO.loadIndex(indexMerger.mergeQueryableIndex(Arrays.asList(indexA, indexB), true, new AggregatorFactory[] { new LongSumAggregatorFactory("sumCount", "sumCount") }, tmpDirMerged, indexSpec, null, -1)));
final QueryableIndexIndexableAdapter adapter = new QueryableIndexIndexableAdapter(merged);
final List<DebugRow> rowList = RowIteratorHelper.toList(adapter.getRows());
Assert.assertEquals(ImmutableList.of("dimA", "dimMultiVal"), ImmutableList.copyOf(adapter.getDimensionNames()));
Assert.assertEquals(3, rowList.size());
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "3", "5")), rowList.get(0).dimensionValues());
Assert.assertEquals(1L, rowList.get(0).metricValues().get(0));
Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "4")), rowList.get(1).dimensionValues());
Assert.assertEquals(2L, rowList.get(1).metricValues().get(0));
Assert.assertEquals(Arrays.asList("potato", Arrays.asList("0", "1", "4")), rowList.get(2).dimensionValues());
Assert.assertEquals(1L, rowList.get(2).metricValues().get(0));
checkBitmapIndex(Arrays.asList(0, 1), adapter.getBitmapIndex("dimA", "leek"));
checkBitmapIndex(Collections.singletonList(2), adapter.getBitmapIndex("dimA", "potato"));
checkBitmapIndex(Collections.singletonList(2), adapter.getBitmapIndex("dimMultiVal", "0"));
checkBitmapIndex(Arrays.asList(0, 1, 2), adapter.getBitmapIndex("dimMultiVal", "1"));
checkBitmapIndex(Arrays.asList(0, 1), adapter.getBitmapIndex("dimMultiVal", "2"));
checkBitmapIndex(Collections.singletonList(0), adapter.getBitmapIndex("dimMultiVal", "3"));
checkBitmapIndex(Arrays.asList(1, 2), adapter.getBitmapIndex("dimMultiVal", "4"));
checkBitmapIndex(Collections.singletonList(0), adapter.getBitmapIndex("dimMultiVal", "5"));
}
Aggregations