Search in sources :

Example 1 with IncrementalIndexSchema

use of org.apache.druid.segment.incremental.IncrementalIndexSchema in project druid by druid-io.

the class SegmentAnalyzerTest method testAnalyzingSegmentWithNonExistentAggregator.

/**
 * This test verifies that if a segment was created using an unknown/invalid aggregator
 * (which can happen if an aggregator was removed for a later version), then,
 * analyzing the segment doesn't fail and the result of analysis of the complex column
 * is reported as an error.
 * @throws IOException
 */
@Test
public void testAnalyzingSegmentWithNonExistentAggregator() throws IOException {
    final URL resource = SegmentAnalyzerTest.class.getClassLoader().getResource("druid.sample.numeric.tsv");
    CharSource source = Resources.asByteSource(resource).asCharSource(StandardCharsets.UTF_8);
    String invalid_aggregator = "invalid_aggregator";
    AggregatorFactory[] metrics = new AggregatorFactory[] { new DoubleSumAggregatorFactory(TestIndex.DOUBLE_METRICS[0], "index"), new HyperUniquesAggregatorFactory("quality_uniques", "quality"), new InvalidAggregatorFactory(invalid_aggregator, "quality") };
    final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withMinTimestamp(DateTimes.of("2011-01-12T00:00:00.000Z").getMillis()).withTimestampSpec(new TimestampSpec("ds", "auto", null)).withDimensionsSpec(TestIndex.DIMENSIONS_SPEC).withMetrics(metrics).withRollup(true).build();
    final IncrementalIndex retVal = new OnheapIncrementalIndex.Builder().setIndexSchema(schema).setMaxRowCount(10000).build();
    IncrementalIndex incrementalIndex = TestIndex.loadIncrementalIndex(retVal, source);
    // Analyze the in-memory segment.
    {
        SegmentAnalyzer analyzer = new SegmentAnalyzer(EnumSet.of(SegmentMetadataQuery.AnalysisType.SIZE));
        IncrementalIndexSegment segment = new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("ds"));
        Map<String, ColumnAnalysis> analyses = analyzer.analyze(segment);
        ColumnAnalysis columnAnalysis = analyses.get(invalid_aggregator);
        Assert.assertFalse(columnAnalysis.isError());
        Assert.assertEquals("invalid_complex_column_type", columnAnalysis.getType());
        Assert.assertEquals(ColumnType.ofComplex("invalid_complex_column_type"), columnAnalysis.getTypeSignature());
    }
    // Persist the index.
    final File segmentFile = TestIndex.INDEX_MERGER.persist(incrementalIndex, temporaryFolder.newFolder(), TestIndex.INDEX_SPEC, null);
    // Unload the complex serde, then analyze the persisted segment.
    ComplexMetrics.unregisterSerde(InvalidAggregatorFactory.TYPE);
    {
        SegmentAnalyzer analyzer = new SegmentAnalyzer(EnumSet.of(SegmentMetadataQuery.AnalysisType.SIZE));
        QueryableIndexSegment segment = new QueryableIndexSegment(TestIndex.INDEX_IO.loadIndex(segmentFile), SegmentId.dummy("ds"));
        Map<String, ColumnAnalysis> analyses = analyzer.analyze(segment);
        ColumnAnalysis invalidColumnAnalysis = analyses.get(invalid_aggregator);
        Assert.assertTrue(invalidColumnAnalysis.isError());
        Assert.assertEquals("error:unknown_complex_invalid_complex_column_type", invalidColumnAnalysis.getErrorMessage());
        // Run a segment metadata query also to verify it doesn't break
        final List<SegmentAnalysis> results = getSegmentAnalysises(segment, EnumSet.of(SegmentMetadataQuery.AnalysisType.SIZE));
        for (SegmentAnalysis result : results) {
            Assert.assertTrue(result.getColumns().get(invalid_aggregator).isError());
        }
    }
}
Also used : QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) CharSource(com.google.common.io.CharSource) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) IncrementalIndexSegment(org.apache.druid.segment.IncrementalIndexSegment) ColumnBuilder(org.apache.druid.segment.column.ColumnBuilder) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) URL(java.net.URL) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) ColumnAnalysis(org.apache.druid.query.metadata.metadata.ColumnAnalysis) List(java.util.List) SegmentAnalysis(org.apache.druid.query.metadata.metadata.SegmentAnalysis) Map(java.util.Map) File(java.io.File) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 2 with IncrementalIndexSchema

use of org.apache.druid.segment.incremental.IncrementalIndexSchema in project druid by druid-io.

the class Sink method makeNewCurrIndex.

private FireHydrant makeNewCurrIndex(long minTimestamp, DataSchema schema) {
    final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withMinTimestamp(minTimestamp).withTimestampSpec(schema.getTimestampSpec()).withQueryGranularity(schema.getGranularitySpec().getQueryGranularity()).withDimensionsSpec(schema.getDimensionsSpec()).withMetrics(schema.getAggregators()).withRollup(schema.getGranularitySpec().isRollup()).build();
    // Build the incremental-index according to the spec that was chosen by the user
    final IncrementalIndex newIndex = appendableIndexSpec.builder().setIndexSchema(indexSchema).setMaxRowCount(maxRowsInMemory).setMaxBytesInMemory(maxBytesInMemory).setUseMaxMemoryEstimates(useMaxMemoryEstimates).build();
    final FireHydrant old;
    synchronized (hydrantLock) {
        if (writable) {
            old = currHydrant;
            int newCount = 0;
            int numHydrants = hydrants.size();
            if (numHydrants > 0) {
                FireHydrant lastHydrant = hydrants.get(numHydrants - 1);
                newCount = lastHydrant.getCount() + 1;
                if (!indexSchema.getDimensionsSpec().hasCustomDimensions()) {
                    Map<String, ColumnCapabilities> oldCapabilities;
                    if (lastHydrant.hasSwapped()) {
                        oldCapabilities = new HashMap<>();
                        ReferenceCountingSegment segment = lastHydrant.getIncrementedSegment();
                        try {
                            QueryableIndex oldIndex = segment.asQueryableIndex();
                            for (String dim : oldIndex.getAvailableDimensions()) {
                                dimOrder.add(dim);
                                oldCapabilities.put(dim, oldIndex.getColumnHolder(dim).getCapabilities());
                            }
                        } finally {
                            segment.decrement();
                        }
                    } else {
                        IncrementalIndex oldIndex = lastHydrant.getIndex();
                        dimOrder.addAll(oldIndex.getDimensionOrder());
                        oldCapabilities = oldIndex.getColumnCapabilities();
                    }
                    newIndex.loadDimensionIterable(dimOrder, oldCapabilities);
                }
            }
            currHydrant = new FireHydrant(newIndex, newCount, getSegment().getId());
            if (old != null) {
                numRowsExcludingCurrIndex.addAndGet(old.getIndex().size());
            }
            hydrants.add(currHydrant);
        } else {
            // Oops, someone called finishWriting while we were making this new index.
            newIndex.close();
            throw new ISE("finishWriting() called during swap");
        }
    }
    return old;
}
Also used : ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) QueryableIndex(org.apache.druid.segment.QueryableIndex) ISE(org.apache.druid.java.util.common.ISE) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities)

Example 3 with IncrementalIndexSchema

use of org.apache.druid.segment.incremental.IncrementalIndexSchema in project druid by druid-io.

the class DataGeneratorTest method testToIndex.

@Test
public void testToIndex() {
    List<GeneratorColumnSchema> schemas = new ArrayList<>();
    schemas.add(GeneratorColumnSchema.makeSequential("dimA", ValueType.STRING, false, 1, null, 10, 20));
    schemas.add(GeneratorColumnSchema.makeEnumeratedSequential("dimB", ValueType.STRING, false, 1, null, Arrays.asList("Hello", "World", "Foo", "Bar")));
    schemas.add(GeneratorColumnSchema.makeSequential("dimC", ValueType.STRING, false, 1, 0.50, 30, 40));
    DataGenerator dataGenerator = new DataGenerator(schemas, 9999, 0, 0, 1000.0);
    DimensionsSpec dimensions = new DimensionsSpec(Arrays.asList(new StringDimensionSchema("dimA"), new StringDimensionSchema("dimB"), new StringDimensionSchema("dimC")));
    AggregatorFactory[] metrics = { new CountAggregatorFactory("cnt") };
    final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withQueryGranularity(Granularities.MINUTE).withDimensionsSpec(dimensions).withMetrics(metrics).withRollup(false).build();
    IncrementalIndex index = new OnheapIncrementalIndex.Builder().setIndexSchema(schema).setSortFacts(false).setMaxRowCount(1_000_000).build();
    dataGenerator.addToIndex(index, 100);
    Assert.assertEquals(100, index.size());
}
Also used : IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) ArrayList(java.util.ArrayList) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 4 with IncrementalIndexSchema

use of org.apache.druid.segment.incremental.IncrementalIndexSchema in project druid by druid-io.

the class SegmentGenerator method generateIncrementalIndex.

public IncrementalIndex generateIncrementalIndex(final DataSegment dataSegment, final GeneratorSchemaInfo schemaInfo, final Granularity granularity, final int numRows) {
    // In case we need to generate hyperUniques.
    ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());
    final String dataHash = Hashing.sha256().newHasher().putString(dataSegment.getId().toString(), StandardCharsets.UTF_8).putString(schemaInfo.toString(), StandardCharsets.UTF_8).putString(granularity.toString(), StandardCharsets.UTF_8).putInt(numRows).hash().toString();
    final DataGenerator dataGenerator = new DataGenerator(schemaInfo.getColumnSchemas(), dataSegment.getId().hashCode(), /* Use segment identifier hashCode as seed */
    schemaInfo.getDataInterval(), numRows);
    final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(schemaInfo.getDimensionsSpec()).withMetrics(schemaInfo.getAggsArray()).withRollup(schemaInfo.isWithRollup()).withQueryGranularity(granularity).build();
    final List<InputRow> rows = new ArrayList<>();
    for (int i = 0; i < numRows; i++) {
        final InputRow row = dataGenerator.nextRow();
        rows.add(row);
        if ((i + 1) % 20000 == 0) {
            log.info("%,d/%,d rows generated for[%s].", i + 1, numRows, dataSegment);
        }
    }
    log.info("%,d/%,d rows generated for[%s].", numRows, numRows, dataSegment);
    return makeIncrementalIndex(dataSegment.getId(), dataHash, 0, rows, indexSchema);
}
Also used : InputRow(org.apache.druid.data.input.InputRow) ArrayList(java.util.ArrayList) HyperUniquesSerde(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesSerde) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema)

Example 5 with IncrementalIndexSchema

use of org.apache.druid.segment.incremental.IncrementalIndexSchema in project druid by druid-io.

the class IndexMergerTestBase method testMultivalDim_mergeAcrossSegments_rollupWorks.

@Test
public void testMultivalDim_mergeAcrossSegments_rollupWorks() throws Exception {
    List<String> dims = Arrays.asList("dimA", "dimMultiVal");
    IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(new DimensionsSpec(ImmutableList.of(new StringDimensionSchema("dimA", MultiValueHandling.SORTED_ARRAY, true), new StringDimensionSchema("dimMultiVal", MultiValueHandling.SORTED_ARRAY, true)))).withMetrics(new LongSumAggregatorFactory("sumCount", "sumCount")).withRollup(true).build();
    IncrementalIndex toPersistA = new OnheapIncrementalIndex.Builder().setIndexSchema(indexSchema).setMaxRowCount(1000).build();
    Map<String, Object> event1 = new HashMap<>();
    event1.put("dimA", "leek");
    event1.put("dimMultiVal", ImmutableList.of("1", "2", "4"));
    event1.put("sumCount", 1L);
    Map<String, Object> event2 = new HashMap<>();
    event2.put("dimA", "leek");
    event2.put("dimMultiVal", ImmutableList.of("1", "2", "3", "5"));
    event2.put("sumCount", 1L);
    toPersistA.add(new MapBasedInputRow(1, dims, event1));
    toPersistA.add(new MapBasedInputRow(1, dims, event2));
    IncrementalIndex toPersistB = new OnheapIncrementalIndex.Builder().setIndexSchema(indexSchema).setMaxRowCount(1000).build();
    Map<String, Object> event3 = new HashMap<>();
    event3.put("dimA", "leek");
    event3.put("dimMultiVal", ImmutableList.of("1", "2", "4"));
    event3.put("sumCount", 1L);
    Map<String, Object> event4 = new HashMap<>();
    event4.put("dimA", "potato");
    event4.put("dimMultiVal", ImmutableList.of("0", "1", "4"));
    event4.put("sumCount", 1L);
    toPersistB.add(new MapBasedInputRow(1, dims, event3));
    toPersistB.add(new MapBasedInputRow(1, dims, event4));
    final File tmpDirA = temporaryFolder.newFolder();
    final File tmpDirB = temporaryFolder.newFolder();
    final File tmpDirMerged = temporaryFolder.newFolder();
    QueryableIndex indexA = closer.closeLater(indexIO.loadIndex(indexMerger.persist(toPersistA, tmpDirA, indexSpec, null)));
    QueryableIndex indexB = closer.closeLater(indexIO.loadIndex(indexMerger.persist(toPersistB, tmpDirB, indexSpec, null)));
    final QueryableIndex merged = closer.closeLater(indexIO.loadIndex(indexMerger.mergeQueryableIndex(Arrays.asList(indexA, indexB), true, new AggregatorFactory[] { new LongSumAggregatorFactory("sumCount", "sumCount") }, tmpDirMerged, indexSpec, null, -1)));
    final QueryableIndexIndexableAdapter adapter = new QueryableIndexIndexableAdapter(merged);
    final List<DebugRow> rowList = RowIteratorHelper.toList(adapter.getRows());
    Assert.assertEquals(ImmutableList.of("dimA", "dimMultiVal"), ImmutableList.copyOf(adapter.getDimensionNames()));
    Assert.assertEquals(3, rowList.size());
    Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "3", "5")), rowList.get(0).dimensionValues());
    Assert.assertEquals(1L, rowList.get(0).metricValues().get(0));
    Assert.assertEquals(Arrays.asList("leek", Arrays.asList("1", "2", "4")), rowList.get(1).dimensionValues());
    Assert.assertEquals(2L, rowList.get(1).metricValues().get(0));
    Assert.assertEquals(Arrays.asList("potato", Arrays.asList("0", "1", "4")), rowList.get(2).dimensionValues());
    Assert.assertEquals(1L, rowList.get(2).metricValues().get(0));
    checkBitmapIndex(Arrays.asList(0, 1), adapter.getBitmapIndex("dimA", "leek"));
    checkBitmapIndex(Collections.singletonList(2), adapter.getBitmapIndex("dimA", "potato"));
    checkBitmapIndex(Collections.singletonList(2), adapter.getBitmapIndex("dimMultiVal", "0"));
    checkBitmapIndex(Arrays.asList(0, 1, 2), adapter.getBitmapIndex("dimMultiVal", "1"));
    checkBitmapIndex(Arrays.asList(0, 1), adapter.getBitmapIndex("dimMultiVal", "2"));
    checkBitmapIndex(Collections.singletonList(0), adapter.getBitmapIndex("dimMultiVal", "3"));
    checkBitmapIndex(Arrays.asList(1, 2), adapter.getBitmapIndex("dimMultiVal", "4"));
    checkBitmapIndex(Collections.singletonList(0), adapter.getBitmapIndex("dimMultiVal", "5"));
}
Also used : IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) HashMap(java.util.HashMap) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) File(java.io.File) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) IncrementalIndexTest(org.apache.druid.segment.data.IncrementalIndexTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

IncrementalIndexSchema (org.apache.druid.segment.incremental.IncrementalIndexSchema)19 IncrementalIndex (org.apache.druid.segment.incremental.IncrementalIndex)16 OnheapIncrementalIndex (org.apache.druid.segment.incremental.OnheapIncrementalIndex)14 File (java.io.File)10 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)8 Test (org.junit.Test)8 ArrayList (java.util.ArrayList)7 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)7 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)7 IncrementalIndexTest (org.apache.druid.segment.data.IncrementalIndexTest)6 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)5 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)5 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)5 StringDimensionSchema (org.apache.druid.data.input.impl.StringDimensionSchema)4 IndexSizeExceededException (org.apache.druid.segment.incremental.IndexSizeExceededException)4 IOException (java.io.IOException)3 List (java.util.List)3 InputRow (org.apache.druid.data.input.InputRow)3 ISE (org.apache.druid.java.util.common.ISE)3 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)3