Search in sources :

Example 81 with IncrementalIndex

use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class IndexMergerNullHandlingTest method testStringColumnNullHandling.

@Test
public void testStringColumnNullHandling() throws Exception {
    List<Map<String, Object>> nonNullFlavors = new ArrayList<>();
    nonNullFlavors.add(ImmutableMap.of("d", "a"));
    nonNullFlavors.add(ImmutableMap.of("d", ImmutableList.of("a", "b")));
    List<Map<String, Object>> nullFlavors = new ArrayList<>();
    Map<String, Object> mMissing = ImmutableMap.of();
    Map<String, Object> mEmptyList = ImmutableMap.of("d", Collections.emptyList());
    Map<String, Object> mNull = new HashMap<>();
    mNull.put("d", null);
    Map<String, Object> mEmptyString = ImmutableMap.of("d", "");
    Map<String, Object> mListOfNull = ImmutableMap.of("d", Collections.singletonList(null));
    Map<String, Object> mListOfEmptyString = ImmutableMap.of("d", Collections.singletonList(""));
    nullFlavors.add(mMissing);
    nullFlavors.add(mEmptyList);
    nullFlavors.add(mNull);
    nullFlavors.add(mListOfNull);
    if (NullHandling.replaceWithDefault()) {
        nullFlavors.add(mEmptyString);
        nullFlavors.add(mListOfEmptyString);
    } else {
        nonNullFlavors.add(mEmptyString);
        nonNullFlavors.add(mListOfEmptyString);
    }
    Set<Map<String, Object>> allValues = new HashSet<>();
    allValues.addAll(nonNullFlavors);
    allValues.addAll(nullFlavors);
    for (Set<Map<String, Object>> subset : Sets.powerSet(allValues)) {
        if (subset.isEmpty()) {
            continue;
        }
        final List<Map<String, Object>> subsetList = new ArrayList<>(subset);
        IncrementalIndex toPersist = IncrementalIndexTest.createIndex(new AggregatorFactory[] {});
        for (Map<String, Object> m : subsetList) {
            toPersist.add(new MapBasedInputRow(0L, ImmutableList.of("d"), m));
        }
        final File tempDir = temporaryFolder.newFolder();
        try (QueryableIndex index = indexIO.loadIndex(indexMerger.persist(toPersist, tempDir, indexSpec, null))) {
            final ColumnHolder columnHolder = index.getColumnHolder("d");
            if (nullFlavors.containsAll(subsetList)) {
                // all null -> should be missing
                Assert.assertNull(subsetList.toString(), columnHolder);
            } else {
                Assert.assertNotNull(subsetList.toString(), columnHolder);
                // The column has multiple values if there are any lists with > 1 element in the input set.
                final boolean hasMultipleValues = subsetList.stream().anyMatch(m -> m.get("d") instanceof List && (((List) m.get("d")).size() > 1));
                // Compute all unique values, the same way that IndexMerger is expected to do it.
                final Set<String> uniqueValues = new HashSet<>();
                for (Map<String, Object> m : subsetList) {
                    final List<String> dValues = normalize(m.get("d"), hasMultipleValues);
                    uniqueValues.addAll(dValues);
                    if (nullFlavors.contains(m)) {
                        uniqueValues.add(null);
                    }
                }
                try (final DictionaryEncodedColumn<String> dictionaryColumn = (DictionaryEncodedColumn<String>) columnHolder.getColumn()) {
                    // Verify unique values against the dictionary.
                    Assert.assertEquals(subsetList.toString(), uniqueValues.stream().sorted(Comparators.naturalNullsFirst()).collect(Collectors.toList()), IntStream.range(0, dictionaryColumn.getCardinality()).mapToObj(dictionaryColumn::lookupName).collect(Collectors.toList()));
                    Assert.assertEquals(subsetList.toString(), hasMultipleValues, dictionaryColumn.hasMultipleValues());
                    Assert.assertEquals(subsetList.toString(), uniqueValues.size(), dictionaryColumn.getCardinality());
                    // Verify the expected set of rows was indexed, ignoring order.
                    Assert.assertEquals(subsetList.toString(), ImmutableMultiset.copyOf(subsetList.stream().map(m -> normalize(m.get("d"), hasMultipleValues)).distinct().collect(Collectors.toList())), ImmutableMultiset.copyOf(IntStream.range(0, index.getNumRows()).mapToObj(rowNumber -> getRow(dictionaryColumn, rowNumber)).distinct().collect(Collectors.toList())));
                    // Verify that the bitmap index for null is correct.
                    final BitmapIndex bitmapIndex = columnHolder.getBitmapIndex();
                    // Read through the column to find all the rows that should match null.
                    final List<Integer> expectedNullRows = new ArrayList<>();
                    for (int i = 0; i < index.getNumRows(); i++) {
                        final List<String> row = getRow(dictionaryColumn, i);
                        if (row.isEmpty() || row.stream().anyMatch(NullHandling::isNullOrEquivalent)) {
                            expectedNullRows.add(i);
                        }
                    }
                    Assert.assertEquals(subsetList.toString(), expectedNullRows.size() > 0, bitmapIndex.hasNulls());
                    if (expectedNullRows.size() > 0) {
                        Assert.assertEquals(subsetList.toString(), 0, bitmapIndex.getIndex(null));
                        final ImmutableBitmap nullBitmap = bitmapIndex.getBitmap(bitmapIndex.getIndex(null));
                        final List<Integer> actualNullRows = new ArrayList<>();
                        final IntIterator iterator = nullBitmap.iterator();
                        while (iterator.hasNext()) {
                            actualNullRows.add(iterator.next());
                        }
                        Assert.assertEquals(subsetList.toString(), expectedNullRows, actualNullRows);
                    } else {
                        Assert.assertEquals(-1, bitmapIndex.getIndex(null));
                    }
                }
            }
        }
    }
}
Also used : IntIterator(org.roaringbitmap.IntIterator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) BitmapIndex(org.apache.druid.segment.column.BitmapIndex) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) HashSet(java.util.HashSet) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) ImmutableBitmap(org.apache.druid.collections.bitmap.ImmutableBitmap) DictionaryEncodedColumn(org.apache.druid.segment.column.DictionaryEncodedColumn) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) File(java.io.File) IncrementalIndexTest(org.apache.druid.segment.data.IncrementalIndexTest) Test(org.junit.Test)

Example 82 with IncrementalIndex

use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class IndexMergerTestBase method testDimensionWithEmptyName.

@Test
public void testDimensionWithEmptyName() throws Exception {
    final long timestamp = System.currentTimeMillis();
    IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null);
    IncrementalIndexTest.populateIndex(timestamp, toPersist);
    toPersist.add(new MapBasedInputRow(timestamp, Arrays.asList("", "dim2"), ImmutableMap.of("", "1", "dim2", "2")));
    final File tempDir = temporaryFolder.newFolder();
    QueryableIndex index = closer.closeLater(indexIO.loadIndex(indexMerger.persist(toPersist, tempDir, indexSpec, null)));
    Assert.assertEquals(3, index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getLength());
    Assert.assertEquals(Arrays.asList("dim1", "dim2"), Lists.newArrayList(index.getAvailableDimensions()));
    Assert.assertEquals(3, index.getColumnNames().size());
    assertDimCompression(index, indexSpec.getDimensionCompression());
    Assert.assertArrayEquals(IncrementalIndexTest.getDefaultCombiningAggregatorFactories(), index.getMetadata().getAggregators());
    Assert.assertEquals(Granularities.NONE, index.getMetadata().getQueryGranularity());
}
Also used : IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) File(java.io.File) IncrementalIndexTest(org.apache.druid.segment.data.IncrementalIndexTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 83 with IncrementalIndex

use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class IndexMergerTestBase method testMergeNumericDims.

@Test
public void testMergeNumericDims() throws Exception {
    IncrementalIndex toPersist1 = getIndexWithNumericDims();
    IncrementalIndex toPersist2 = getIndexWithNumericDims();
    final File tmpDir = temporaryFolder.newFolder();
    final File tmpDir2 = temporaryFolder.newFolder();
    final File tmpDirMerged = temporaryFolder.newFolder();
    QueryableIndex index1 = closer.closeLater(indexIO.loadIndex(indexMerger.persist(toPersist1, tmpDir, indexSpec, null)));
    QueryableIndex index2 = closer.closeLater(indexIO.loadIndex(indexMerger.persist(toPersist2, tmpDir2, indexSpec, null)));
    final QueryableIndex merged = closer.closeLater(indexIO.loadIndex(indexMerger.mergeQueryableIndex(Arrays.asList(index1, index2), true, new AggregatorFactory[] { new CountAggregatorFactory("count") }, tmpDirMerged, indexSpec, null, -1)));
    final IndexableAdapter adapter = new QueryableIndexIndexableAdapter(merged);
    final List<DebugRow> rowList = RowIteratorHelper.toList(adapter.getRows());
    Assert.assertEquals(ImmutableList.of("dimA", "dimB", "dimC"), ImmutableList.copyOf(adapter.getDimensionNames()));
    Assert.assertEquals(4, rowList.size());
    Assert.assertEquals(Arrays.asList(NullHandling.defaultLongValue(), NullHandling.defaultFloatValue(), "Nully Row"), rowList.get(0).dimensionValues());
    Assert.assertEquals(Collections.singletonList(2L), rowList.get(0).metricValues());
    Assert.assertEquals(Arrays.asList(72L, 60000.789f, "World"), rowList.get(1).dimensionValues());
    Assert.assertEquals(Collections.singletonList(2L), rowList.get(0).metricValues());
    Assert.assertEquals(Arrays.asList(100L, 4000.567f, "Hello"), rowList.get(2).dimensionValues());
    Assert.assertEquals(Collections.singletonList(2L), rowList.get(1).metricValues());
    Assert.assertEquals(Arrays.asList(3001L, 1.2345f, "Foobar"), rowList.get(3).dimensionValues());
    Assert.assertEquals(Collections.singletonList(2L), rowList.get(2).metricValues());
}
Also used : CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) File(java.io.File) IncrementalIndexTest(org.apache.druid.segment.data.IncrementalIndexTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 84 with IncrementalIndex

use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class SchemalessIndexTest method makeRowPersistedIndexes.

private void makeRowPersistedIndexes() {
    synchronized (log) {
        try {
            if (EVENTS.isEmpty()) {
                makeEvents();
            }
            for (final Map<String, Object> event : EVENTS) {
                final long timestamp = new DateTime(event.get(TIMESTAMP), ISOChronology.getInstanceUTC()).getMillis();
                final List<String> dims = new ArrayList<>();
                for (Map.Entry<String, Object> entry : event.entrySet()) {
                    if (!entry.getKey().equalsIgnoreCase(TIMESTAMP) && !METRICS.contains(entry.getKey())) {
                        dims.add(entry.getKey());
                    }
                }
                final IncrementalIndex rowIndex = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withMinTimestamp(timestamp).withQueryGranularity(Granularities.MINUTE).withMetrics(METRIC_AGGS).build()).setMaxRowCount(1000).build();
                rowIndex.add(new MapBasedInputRow(timestamp, dims, event));
                File tmpFile = File.createTempFile("billy", "yay");
                tmpFile.delete();
                FileUtils.mkdirp(tmpFile);
                tmpFile.deleteOnExit();
                indexMerger.persist(rowIndex, tmpFile, INDEX_SPEC, null);
                ROW_PERSISTED_INDEXES.add(indexIO.loadIndex(tmpFile));
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}
Also used : IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) ArrayList(java.util.ArrayList) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) IOException(java.io.IOException) DateTime(org.joda.time.DateTime) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) HashMap(java.util.HashMap) Map(java.util.Map) File(java.io.File) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema)

Example 85 with IncrementalIndex

use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class SchemalessIndexTest method getIncrementalIndex.

public static QueryableIndex getIncrementalIndex(int index1, int index2) {
    synchronized (log) {
        if (EVENTS.isEmpty()) {
            makeEvents();
        }
        Map<Integer, QueryableIndex> entry = INCREMENTAL_INDEXES.get(index1);
        if (entry != null) {
            QueryableIndex index = entry.get(index2);
            if (index != null) {
                return index;
            }
        } else {
            entry = new HashMap<>();
            INCREMENTAL_INDEXES.put(index1, entry);
        }
        IncrementalIndex theIndex = null;
        int count = 0;
        for (final Map<String, Object> event : EVENTS) {
            if (count != index1 && count != index2) {
                count++;
                continue;
            }
            final long timestamp = new DateTime(event.get(TIMESTAMP), ISOChronology.getInstanceUTC()).getMillis();
            if (theIndex == null) {
                theIndex = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withMinTimestamp(timestamp).withQueryGranularity(Granularities.MINUTE).withMetrics(METRIC_AGGS).build()).setMaxRowCount(1000).build();
            }
            final List<String> dims = new ArrayList<>();
            for (final Map.Entry<String, Object> val : event.entrySet()) {
                if (!val.getKey().equalsIgnoreCase(TIMESTAMP) && !METRICS.contains(val.getKey())) {
                    dims.add(val.getKey());
                }
            }
            try {
                theIndex.add(new MapBasedInputRow(timestamp, dims, event));
            } catch (IndexSizeExceededException e) {
                throw new RuntimeException(e);
            }
            count++;
        }
        QueryableIndex retVal = TestIndex.persistRealtimeAndLoadMMapped(theIndex);
        entry.put(index2, retVal);
        return retVal;
    }
}
Also used : IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) ArrayList(java.util.ArrayList) DateTime(org.joda.time.DateTime) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) HashMap(java.util.HashMap) Map(java.util.Map) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException)

Aggregations

IncrementalIndex (org.apache.druid.segment.incremental.IncrementalIndex)109 OnheapIncrementalIndex (org.apache.druid.segment.incremental.OnheapIncrementalIndex)85 File (java.io.File)59 Test (org.junit.Test)51 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)46 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)46 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)26 IncrementalIndexTest (org.apache.druid.segment.data.IncrementalIndexTest)26 ArrayList (java.util.ArrayList)25 IncrementalIndexSchema (org.apache.druid.segment.incremental.IncrementalIndexSchema)25 IndexSpec (org.apache.druid.segment.IndexSpec)19 QueryableIndex (org.apache.druid.segment.QueryableIndex)19 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)17 InputRow (org.apache.druid.data.input.InputRow)15 IncrementalIndexSegment (org.apache.druid.segment.IncrementalIndexSegment)14 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)12 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)11 IOException (java.io.IOException)10 Before (org.junit.Before)10 Interval (org.joda.time.Interval)9