use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.
the class IndexMergerNullHandlingTest method testStringColumnNullHandling.
@Test
public void testStringColumnNullHandling() throws Exception {
List<Map<String, Object>> nonNullFlavors = new ArrayList<>();
nonNullFlavors.add(ImmutableMap.of("d", "a"));
nonNullFlavors.add(ImmutableMap.of("d", ImmutableList.of("a", "b")));
List<Map<String, Object>> nullFlavors = new ArrayList<>();
Map<String, Object> mMissing = ImmutableMap.of();
Map<String, Object> mEmptyList = ImmutableMap.of("d", Collections.emptyList());
Map<String, Object> mNull = new HashMap<>();
mNull.put("d", null);
Map<String, Object> mEmptyString = ImmutableMap.of("d", "");
Map<String, Object> mListOfNull = ImmutableMap.of("d", Collections.singletonList(null));
Map<String, Object> mListOfEmptyString = ImmutableMap.of("d", Collections.singletonList(""));
nullFlavors.add(mMissing);
nullFlavors.add(mEmptyList);
nullFlavors.add(mNull);
nullFlavors.add(mListOfNull);
if (NullHandling.replaceWithDefault()) {
nullFlavors.add(mEmptyString);
nullFlavors.add(mListOfEmptyString);
} else {
nonNullFlavors.add(mEmptyString);
nonNullFlavors.add(mListOfEmptyString);
}
Set<Map<String, Object>> allValues = new HashSet<>();
allValues.addAll(nonNullFlavors);
allValues.addAll(nullFlavors);
for (Set<Map<String, Object>> subset : Sets.powerSet(allValues)) {
if (subset.isEmpty()) {
continue;
}
final List<Map<String, Object>> subsetList = new ArrayList<>(subset);
IncrementalIndex toPersist = IncrementalIndexTest.createIndex(new AggregatorFactory[] {});
for (Map<String, Object> m : subsetList) {
toPersist.add(new MapBasedInputRow(0L, ImmutableList.of("d"), m));
}
final File tempDir = temporaryFolder.newFolder();
try (QueryableIndex index = indexIO.loadIndex(indexMerger.persist(toPersist, tempDir, indexSpec, null))) {
final ColumnHolder columnHolder = index.getColumnHolder("d");
if (nullFlavors.containsAll(subsetList)) {
// all null -> should be missing
Assert.assertNull(subsetList.toString(), columnHolder);
} else {
Assert.assertNotNull(subsetList.toString(), columnHolder);
// The column has multiple values if there are any lists with > 1 element in the input set.
final boolean hasMultipleValues = subsetList.stream().anyMatch(m -> m.get("d") instanceof List && (((List) m.get("d")).size() > 1));
// Compute all unique values, the same way that IndexMerger is expected to do it.
final Set<String> uniqueValues = new HashSet<>();
for (Map<String, Object> m : subsetList) {
final List<String> dValues = normalize(m.get("d"), hasMultipleValues);
uniqueValues.addAll(dValues);
if (nullFlavors.contains(m)) {
uniqueValues.add(null);
}
}
try (final DictionaryEncodedColumn<String> dictionaryColumn = (DictionaryEncodedColumn<String>) columnHolder.getColumn()) {
// Verify unique values against the dictionary.
Assert.assertEquals(subsetList.toString(), uniqueValues.stream().sorted(Comparators.naturalNullsFirst()).collect(Collectors.toList()), IntStream.range(0, dictionaryColumn.getCardinality()).mapToObj(dictionaryColumn::lookupName).collect(Collectors.toList()));
Assert.assertEquals(subsetList.toString(), hasMultipleValues, dictionaryColumn.hasMultipleValues());
Assert.assertEquals(subsetList.toString(), uniqueValues.size(), dictionaryColumn.getCardinality());
// Verify the expected set of rows was indexed, ignoring order.
Assert.assertEquals(subsetList.toString(), ImmutableMultiset.copyOf(subsetList.stream().map(m -> normalize(m.get("d"), hasMultipleValues)).distinct().collect(Collectors.toList())), ImmutableMultiset.copyOf(IntStream.range(0, index.getNumRows()).mapToObj(rowNumber -> getRow(dictionaryColumn, rowNumber)).distinct().collect(Collectors.toList())));
// Verify that the bitmap index for null is correct.
final BitmapIndex bitmapIndex = columnHolder.getBitmapIndex();
// Read through the column to find all the rows that should match null.
final List<Integer> expectedNullRows = new ArrayList<>();
for (int i = 0; i < index.getNumRows(); i++) {
final List<String> row = getRow(dictionaryColumn, i);
if (row.isEmpty() || row.stream().anyMatch(NullHandling::isNullOrEquivalent)) {
expectedNullRows.add(i);
}
}
Assert.assertEquals(subsetList.toString(), expectedNullRows.size() > 0, bitmapIndex.hasNulls());
if (expectedNullRows.size() > 0) {
Assert.assertEquals(subsetList.toString(), 0, bitmapIndex.getIndex(null));
final ImmutableBitmap nullBitmap = bitmapIndex.getBitmap(bitmapIndex.getIndex(null));
final List<Integer> actualNullRows = new ArrayList<>();
final IntIterator iterator = nullBitmap.iterator();
while (iterator.hasNext()) {
actualNullRows.add(iterator.next());
}
Assert.assertEquals(subsetList.toString(), expectedNullRows, actualNullRows);
} else {
Assert.assertEquals(-1, bitmapIndex.getIndex(null));
}
}
}
}
}
}
use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.
the class IndexMergerTestBase method testDimensionWithEmptyName.
@Test
public void testDimensionWithEmptyName() throws Exception {
final long timestamp = System.currentTimeMillis();
IncrementalIndex toPersist = IncrementalIndexTest.createIndex(null);
IncrementalIndexTest.populateIndex(timestamp, toPersist);
toPersist.add(new MapBasedInputRow(timestamp, Arrays.asList("", "dim2"), ImmutableMap.of("", "1", "dim2", "2")));
final File tempDir = temporaryFolder.newFolder();
QueryableIndex index = closer.closeLater(indexIO.loadIndex(indexMerger.persist(toPersist, tempDir, indexSpec, null)));
Assert.assertEquals(3, index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getLength());
Assert.assertEquals(Arrays.asList("dim1", "dim2"), Lists.newArrayList(index.getAvailableDimensions()));
Assert.assertEquals(3, index.getColumnNames().size());
assertDimCompression(index, indexSpec.getDimensionCompression());
Assert.assertArrayEquals(IncrementalIndexTest.getDefaultCombiningAggregatorFactories(), index.getMetadata().getAggregators());
Assert.assertEquals(Granularities.NONE, index.getMetadata().getQueryGranularity());
}
use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.
the class IndexMergerTestBase method testMergeNumericDims.
@Test
public void testMergeNumericDims() throws Exception {
IncrementalIndex toPersist1 = getIndexWithNumericDims();
IncrementalIndex toPersist2 = getIndexWithNumericDims();
final File tmpDir = temporaryFolder.newFolder();
final File tmpDir2 = temporaryFolder.newFolder();
final File tmpDirMerged = temporaryFolder.newFolder();
QueryableIndex index1 = closer.closeLater(indexIO.loadIndex(indexMerger.persist(toPersist1, tmpDir, indexSpec, null)));
QueryableIndex index2 = closer.closeLater(indexIO.loadIndex(indexMerger.persist(toPersist2, tmpDir2, indexSpec, null)));
final QueryableIndex merged = closer.closeLater(indexIO.loadIndex(indexMerger.mergeQueryableIndex(Arrays.asList(index1, index2), true, new AggregatorFactory[] { new CountAggregatorFactory("count") }, tmpDirMerged, indexSpec, null, -1)));
final IndexableAdapter adapter = new QueryableIndexIndexableAdapter(merged);
final List<DebugRow> rowList = RowIteratorHelper.toList(adapter.getRows());
Assert.assertEquals(ImmutableList.of("dimA", "dimB", "dimC"), ImmutableList.copyOf(adapter.getDimensionNames()));
Assert.assertEquals(4, rowList.size());
Assert.assertEquals(Arrays.asList(NullHandling.defaultLongValue(), NullHandling.defaultFloatValue(), "Nully Row"), rowList.get(0).dimensionValues());
Assert.assertEquals(Collections.singletonList(2L), rowList.get(0).metricValues());
Assert.assertEquals(Arrays.asList(72L, 60000.789f, "World"), rowList.get(1).dimensionValues());
Assert.assertEquals(Collections.singletonList(2L), rowList.get(0).metricValues());
Assert.assertEquals(Arrays.asList(100L, 4000.567f, "Hello"), rowList.get(2).dimensionValues());
Assert.assertEquals(Collections.singletonList(2L), rowList.get(1).metricValues());
Assert.assertEquals(Arrays.asList(3001L, 1.2345f, "Foobar"), rowList.get(3).dimensionValues());
Assert.assertEquals(Collections.singletonList(2L), rowList.get(2).metricValues());
}
use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.
the class SchemalessIndexTest method makeRowPersistedIndexes.
private void makeRowPersistedIndexes() {
synchronized (log) {
try {
if (EVENTS.isEmpty()) {
makeEvents();
}
for (final Map<String, Object> event : EVENTS) {
final long timestamp = new DateTime(event.get(TIMESTAMP), ISOChronology.getInstanceUTC()).getMillis();
final List<String> dims = new ArrayList<>();
for (Map.Entry<String, Object> entry : event.entrySet()) {
if (!entry.getKey().equalsIgnoreCase(TIMESTAMP) && !METRICS.contains(entry.getKey())) {
dims.add(entry.getKey());
}
}
final IncrementalIndex rowIndex = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withMinTimestamp(timestamp).withQueryGranularity(Granularities.MINUTE).withMetrics(METRIC_AGGS).build()).setMaxRowCount(1000).build();
rowIndex.add(new MapBasedInputRow(timestamp, dims, event));
File tmpFile = File.createTempFile("billy", "yay");
tmpFile.delete();
FileUtils.mkdirp(tmpFile);
tmpFile.deleteOnExit();
indexMerger.persist(rowIndex, tmpFile, INDEX_SPEC, null);
ROW_PERSISTED_INDEXES.add(indexIO.loadIndex(tmpFile));
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
use of org.apache.druid.segment.incremental.IncrementalIndex in project druid by druid-io.
the class SchemalessIndexTest method getIncrementalIndex.
public static QueryableIndex getIncrementalIndex(int index1, int index2) {
synchronized (log) {
if (EVENTS.isEmpty()) {
makeEvents();
}
Map<Integer, QueryableIndex> entry = INCREMENTAL_INDEXES.get(index1);
if (entry != null) {
QueryableIndex index = entry.get(index2);
if (index != null) {
return index;
}
} else {
entry = new HashMap<>();
INCREMENTAL_INDEXES.put(index1, entry);
}
IncrementalIndex theIndex = null;
int count = 0;
for (final Map<String, Object> event : EVENTS) {
if (count != index1 && count != index2) {
count++;
continue;
}
final long timestamp = new DateTime(event.get(TIMESTAMP), ISOChronology.getInstanceUTC()).getMillis();
if (theIndex == null) {
theIndex = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withMinTimestamp(timestamp).withQueryGranularity(Granularities.MINUTE).withMetrics(METRIC_AGGS).build()).setMaxRowCount(1000).build();
}
final List<String> dims = new ArrayList<>();
for (final Map.Entry<String, Object> val : event.entrySet()) {
if (!val.getKey().equalsIgnoreCase(TIMESTAMP) && !METRICS.contains(val.getKey())) {
dims.add(val.getKey());
}
}
try {
theIndex.add(new MapBasedInputRow(timestamp, dims, event));
} catch (IndexSizeExceededException e) {
throw new RuntimeException(e);
}
count++;
}
QueryableIndex retVal = TestIndex.persistRealtimeAndLoadMMapped(theIndex);
entry.put(index2, retVal);
return retVal;
}
}
Aggregations