Search in sources :

Example 91 with MapBasedInputRow

use of org.apache.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class DataGenerator method nextRow.

public InputRow nextRow() {
    Map<String, Object> event = new HashMap<>();
    for (ColumnValueGenerator generator : columnGenerators) {
        event.put(generator.getSchema().getName(), generator.generateRowValue());
    }
    MapBasedInputRow row = new MapBasedInputRow(nextTimestamp(), dimensionNames, event);
    return row;
}
Also used : HashMap(java.util.HashMap) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow)

Example 92 with MapBasedInputRow

use of org.apache.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class IndexMergerRollupTest method testStringFirstLastRollup.

private void testStringFirstLastRollup(AggregatorFactory[] aggregatorFactories) throws Exception {
    List<Map<String, Object>> eventsList = Arrays.asList(new HashMap<String, Object>() {

        {
            put("d", "d1");
            put("m", "m1");
        }
    }, new HashMap<String, Object>() {

        {
            put("d", "d1");
            put("m", "m2");
        }
    });
    final File tempDir = temporaryFolder.newFolder();
    List<QueryableIndex> indexes = new ArrayList<>();
    Instant time = Instant.now();
    for (Map<String, Object> events : eventsList) {
        IncrementalIndex toPersist = IncrementalIndexTest.createIndex(aggregatorFactories);
        toPersist.add(new MapBasedInputRow(time.toEpochMilli(), ImmutableList.of("d"), events));
        indexes.add(indexIO.loadIndex(indexMerger.persist(toPersist, tempDir, indexSpec, null)));
    }
    File indexFile = indexMerger.mergeQueryableIndex(indexes, true, aggregatorFactories, tempDir, indexSpec, null, -1);
    try (QueryableIndex mergedIndex = indexIO.loadIndex(indexFile)) {
        Assert.assertEquals("Number of rows should be 1", 1, mergedIndex.getNumRows());
    }
}
Also used : IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) Instant(java.time.Instant) ArrayList(java.util.ArrayList) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) HashMap(java.util.HashMap) Map(java.util.Map) File(java.io.File)

Example 93 with MapBasedInputRow

use of org.apache.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class IndexMergerNullHandlingTest method testStringColumnNullHandling.

@Test
public void testStringColumnNullHandling() throws Exception {
    List<Map<String, Object>> nonNullFlavors = new ArrayList<>();
    nonNullFlavors.add(ImmutableMap.of("d", "a"));
    nonNullFlavors.add(ImmutableMap.of("d", ImmutableList.of("a", "b")));
    List<Map<String, Object>> nullFlavors = new ArrayList<>();
    Map<String, Object> mMissing = ImmutableMap.of();
    Map<String, Object> mEmptyList = ImmutableMap.of("d", Collections.emptyList());
    Map<String, Object> mNull = new HashMap<>();
    mNull.put("d", null);
    Map<String, Object> mEmptyString = ImmutableMap.of("d", "");
    Map<String, Object> mListOfNull = ImmutableMap.of("d", Collections.singletonList(null));
    Map<String, Object> mListOfEmptyString = ImmutableMap.of("d", Collections.singletonList(""));
    nullFlavors.add(mMissing);
    nullFlavors.add(mEmptyList);
    nullFlavors.add(mNull);
    nullFlavors.add(mListOfNull);
    if (NullHandling.replaceWithDefault()) {
        nullFlavors.add(mEmptyString);
        nullFlavors.add(mListOfEmptyString);
    } else {
        nonNullFlavors.add(mEmptyString);
        nonNullFlavors.add(mListOfEmptyString);
    }
    Set<Map<String, Object>> allValues = new HashSet<>();
    allValues.addAll(nonNullFlavors);
    allValues.addAll(nullFlavors);
    for (Set<Map<String, Object>> subset : Sets.powerSet(allValues)) {
        if (subset.isEmpty()) {
            continue;
        }
        final List<Map<String, Object>> subsetList = new ArrayList<>(subset);
        IncrementalIndex toPersist = IncrementalIndexTest.createIndex(new AggregatorFactory[] {});
        for (Map<String, Object> m : subsetList) {
            toPersist.add(new MapBasedInputRow(0L, ImmutableList.of("d"), m));
        }
        final File tempDir = temporaryFolder.newFolder();
        try (QueryableIndex index = indexIO.loadIndex(indexMerger.persist(toPersist, tempDir, indexSpec, null))) {
            final ColumnHolder columnHolder = index.getColumnHolder("d");
            if (nullFlavors.containsAll(subsetList)) {
                // all null -> should be missing
                Assert.assertNull(subsetList.toString(), columnHolder);
            } else {
                Assert.assertNotNull(subsetList.toString(), columnHolder);
                // The column has multiple values if there are any lists with > 1 element in the input set.
                final boolean hasMultipleValues = subsetList.stream().anyMatch(m -> m.get("d") instanceof List && (((List) m.get("d")).size() > 1));
                // Compute all unique values, the same way that IndexMerger is expected to do it.
                final Set<String> uniqueValues = new HashSet<>();
                for (Map<String, Object> m : subsetList) {
                    final List<String> dValues = normalize(m.get("d"), hasMultipleValues);
                    uniqueValues.addAll(dValues);
                    if (nullFlavors.contains(m)) {
                        uniqueValues.add(null);
                    }
                }
                try (final DictionaryEncodedColumn<String> dictionaryColumn = (DictionaryEncodedColumn<String>) columnHolder.getColumn()) {
                    // Verify unique values against the dictionary.
                    Assert.assertEquals(subsetList.toString(), uniqueValues.stream().sorted(Comparators.naturalNullsFirst()).collect(Collectors.toList()), IntStream.range(0, dictionaryColumn.getCardinality()).mapToObj(dictionaryColumn::lookupName).collect(Collectors.toList()));
                    Assert.assertEquals(subsetList.toString(), hasMultipleValues, dictionaryColumn.hasMultipleValues());
                    Assert.assertEquals(subsetList.toString(), uniqueValues.size(), dictionaryColumn.getCardinality());
                    // Verify the expected set of rows was indexed, ignoring order.
                    Assert.assertEquals(subsetList.toString(), ImmutableMultiset.copyOf(subsetList.stream().map(m -> normalize(m.get("d"), hasMultipleValues)).distinct().collect(Collectors.toList())), ImmutableMultiset.copyOf(IntStream.range(0, index.getNumRows()).mapToObj(rowNumber -> getRow(dictionaryColumn, rowNumber)).distinct().collect(Collectors.toList())));
                    // Verify that the bitmap index for null is correct.
                    final BitmapIndex bitmapIndex = columnHolder.getBitmapIndex();
                    // Read through the column to find all the rows that should match null.
                    final List<Integer> expectedNullRows = new ArrayList<>();
                    for (int i = 0; i < index.getNumRows(); i++) {
                        final List<String> row = getRow(dictionaryColumn, i);
                        if (row.isEmpty() || row.stream().anyMatch(NullHandling::isNullOrEquivalent)) {
                            expectedNullRows.add(i);
                        }
                    }
                    Assert.assertEquals(subsetList.toString(), expectedNullRows.size() > 0, bitmapIndex.hasNulls());
                    if (expectedNullRows.size() > 0) {
                        Assert.assertEquals(subsetList.toString(), 0, bitmapIndex.getIndex(null));
                        final ImmutableBitmap nullBitmap = bitmapIndex.getBitmap(bitmapIndex.getIndex(null));
                        final List<Integer> actualNullRows = new ArrayList<>();
                        final IntIterator iterator = nullBitmap.iterator();
                        while (iterator.hasNext()) {
                            actualNullRows.add(iterator.next());
                        }
                        Assert.assertEquals(subsetList.toString(), expectedNullRows, actualNullRows);
                    } else {
                        Assert.assertEquals(-1, bitmapIndex.getIndex(null));
                    }
                }
            }
        }
    }
}
Also used : IntIterator(org.roaringbitmap.IntIterator) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) BitmapIndex(org.apache.druid.segment.column.BitmapIndex) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) HashSet(java.util.HashSet) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) ImmutableBitmap(org.apache.druid.collections.bitmap.ImmutableBitmap) DictionaryEncodedColumn(org.apache.druid.segment.column.DictionaryEncodedColumn) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) File(java.io.File) IncrementalIndexTest(org.apache.druid.segment.data.IncrementalIndexTest) Test(org.junit.Test)

Example 94 with MapBasedInputRow

use of org.apache.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class IndexMergerTestBase method testDimensionWithEmptyName.

@Test
public void testDimensionWithEmptyName() throws Exception {
    final long timestamp = System.currentTimeMillis();
    IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null);
    IncrementalIndexTest.populateIndex(timestamp, toPersist);
    toPersist.add(new MapBasedInputRow(timestamp, Arrays.asList("", "dim2"), ImmutableMap.of("", "1", "dim2", "2")));
    final File tempDir = temporaryFolder.newFolder();
    QueryableIndex index = closer.closeLater(indexIO.loadIndex(indexMerger.persist(toPersist, tempDir, indexSpec, null)));
    Assert.assertEquals(3, index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getLength());
    Assert.assertEquals(Arrays.asList("dim1", "dim2"), Lists.newArrayList(index.getAvailableDimensions()));
    Assert.assertEquals(3, index.getColumnNames().size());
    assertDimCompression(index, indexSpec.getDimensionCompression());
    Assert.assertArrayEquals(IncrementalIndexTest.getDefaultCombiningAggregatorFactories(), index.getMetadata().getAggregators());
    Assert.assertEquals(Granularities.NONE, index.getMetadata().getQueryGranularity());
}
Also used : IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) File(java.io.File) IncrementalIndexTest(org.apache.druid.segment.data.IncrementalIndexTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 95 with MapBasedInputRow

use of org.apache.druid.data.input.MapBasedInputRow in project druid by druid-io.

the class SchemalessIndexTest method makeRowPersistedIndexes.

private void makeRowPersistedIndexes() {
    synchronized (log) {
        try {
            if (EVENTS.isEmpty()) {
                makeEvents();
            }
            for (final Map<String, Object> event : EVENTS) {
                final long timestamp = new DateTime(event.get(TIMESTAMP), ISOChronology.getInstanceUTC()).getMillis();
                final List<String> dims = new ArrayList<>();
                for (Map.Entry<String, Object> entry : event.entrySet()) {
                    if (!entry.getKey().equalsIgnoreCase(TIMESTAMP) && !METRICS.contains(entry.getKey())) {
                        dims.add(entry.getKey());
                    }
                }
                final IncrementalIndex rowIndex = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withMinTimestamp(timestamp).withQueryGranularity(Granularities.MINUTE).withMetrics(METRIC_AGGS).build()).setMaxRowCount(1000).build();
                rowIndex.add(new MapBasedInputRow(timestamp, dims, event));
                File tmpFile = File.createTempFile("billy", "yay");
                tmpFile.delete();
                FileUtils.mkdirp(tmpFile);
                tmpFile.deleteOnExit();
                indexMerger.persist(rowIndex, tmpFile, INDEX_SPEC, null);
                ROW_PERSISTED_INDEXES.add(indexIO.loadIndex(tmpFile));
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}
Also used : IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) ArrayList(java.util.ArrayList) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) IOException(java.io.IOException) DateTime(org.joda.time.DateTime) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) HashMap(java.util.HashMap) Map(java.util.Map) File(java.io.File) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema)

Aggregations

MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)114 Test (org.junit.Test)77 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)46 IncrementalIndex (org.apache.druid.segment.incremental.IncrementalIndex)42 OnheapIncrementalIndex (org.apache.druid.segment.incremental.OnheapIncrementalIndex)38 InputRow (org.apache.druid.data.input.InputRow)31 File (java.io.File)24 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)21 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)20 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)19 ArrayList (java.util.ArrayList)17 HashMap (java.util.HashMap)15 DateTime (org.joda.time.DateTime)15 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)14 IncrementalIndexTest (org.apache.druid.segment.data.IncrementalIndexTest)14 Interval (org.joda.time.Interval)14 IOException (java.io.IOException)13 DoubleDimensionSchema (org.apache.druid.data.input.impl.DoubleDimensionSchema)13 IncrementalIndexSchema (org.apache.druid.segment.incremental.IncrementalIndexSchema)12 ImmutableMap (com.google.common.collect.ImmutableMap)11