use of org.apache.druid.segment.incremental.IncrementalIndexSchema in project druid by druid-io.
the class DoubleStorageTest method buildIndex.
private static QueryableIndex buildIndex(String storeDoubleAsFloat) throws IOException {
String oldValue = System.getProperty(ColumnHolder.DOUBLE_STORAGE_TYPE_PROPERTY);
System.setProperty(ColumnHolder.DOUBLE_STORAGE_TYPE_PROPERTY, storeDoubleAsFloat);
final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withMinTimestamp(DateTimes.of("2011-01-13T00:00:00.000Z").getMillis()).withDimensionsSpec(ROW_PARSER).withMetrics(new DoubleSumAggregatorFactory(DIM_FLOAT_NAME, DIM_FLOAT_NAME)).build();
final IncrementalIndex index = new OnheapIncrementalIndex.Builder().setIndexSchema(schema).setMaxRowCount(MAX_ROWS).build();
getStreamOfEvents().forEach(o -> {
try {
index.add(ROW_PARSER.parseBatch((Map<String, Object>) o).get(0));
} catch (IndexSizeExceededException e) {
throw new RuntimeException(e);
}
});
if (oldValue == null) {
System.clearProperty(ColumnHolder.DOUBLE_STORAGE_TYPE_PROPERTY);
} else {
System.setProperty(ColumnHolder.DOUBLE_STORAGE_TYPE_PROPERTY, oldValue);
}
File someTmpFile = File.createTempFile("billy", "yay");
someTmpFile.delete();
FileUtils.mkdirp(someTmpFile);
INDEX_MERGER_V9.persist(index, someTmpFile, new IndexSpec(), null);
someTmpFile.delete();
return INDEX_IO.loadIndex(someTmpFile);
}
use of org.apache.druid.segment.incremental.IncrementalIndexSchema in project druid by druid-io.
the class GroupByQueryHelper method createIndexAccumulatorPair.
public static <T> Pair<IncrementalIndex, Accumulator<IncrementalIndex, T>> createIndexAccumulatorPair(final GroupByQuery query, @Nullable final GroupByQuery subquery, final GroupByQueryConfig config) {
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
final Granularity gran = query.getGranularity();
final long timeStart = query.getIntervals().get(0).getStartMillis();
final boolean combine = subquery == null;
long granTimeStart = timeStart;
if (!(Granularities.ALL.equals(gran))) {
granTimeStart = gran.bucketStart(timeStart);
}
final List<AggregatorFactory> aggs;
if (combine) {
aggs = Lists.transform(query.getAggregatorSpecs(), new Function<AggregatorFactory, AggregatorFactory>() {
@Override
public AggregatorFactory apply(AggregatorFactory input) {
return input.getCombiningFactory();
}
});
} else {
aggs = query.getAggregatorSpecs();
}
final List<String> dimensions = Lists.transform(query.getDimensions(), new Function<DimensionSpec, String>() {
@Override
public String apply(DimensionSpec input) {
return input.getOutputName();
}
});
final IncrementalIndex index;
final boolean sortResults = query.getContextValue(CTX_KEY_SORT_RESULTS, true);
// All groupBy dimensions are strings, for now.
final List<DimensionSchema> dimensionSchemas = new ArrayList<>();
for (DimensionSpec dimension : query.getDimensions()) {
dimensionSchemas.add(new StringDimensionSchema(dimension.getOutputName()));
}
final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(new DimensionsSpec(dimensionSchemas)).withMetrics(aggs.toArray(new AggregatorFactory[0])).withQueryGranularity(gran).withMinTimestamp(granTimeStart).build();
final AppendableIndexBuilder indexBuilder;
if (query.getContextValue("useOffheap", false)) {
throw new UnsupportedOperationException("The 'useOffheap' option is no longer available for groupBy v1. Please move to the newer groupBy engine, " + "which always operates off-heap, by removing any custom 'druid.query.groupBy.defaultStrategy' runtime " + "properties and 'groupByStrategy' query context parameters that you have set.");
} else {
indexBuilder = new OnheapIncrementalIndex.Builder();
}
index = indexBuilder.setIndexSchema(indexSchema).setDeserializeComplexMetrics(false).setConcurrentEventAdd(true).setSortFacts(sortResults).setMaxRowCount(querySpecificConfig.getMaxResults()).build();
Accumulator<IncrementalIndex, T> accumulator = new Accumulator<IncrementalIndex, T>() {
@Override
public IncrementalIndex accumulate(IncrementalIndex accumulated, T in) {
final MapBasedRow mapBasedRow;
if (in instanceof MapBasedRow) {
mapBasedRow = (MapBasedRow) in;
} else if (in instanceof ResultRow) {
final ResultRow row = (ResultRow) in;
mapBasedRow = row.toMapBasedRow(combine ? query : subquery);
} else {
throw new ISE("Unable to accumulate something of type [%s]", in.getClass());
}
try {
accumulated.add(new MapBasedInputRow(mapBasedRow.getTimestamp(), dimensions, mapBasedRow.getEvent()));
} catch (IndexSizeExceededException e) {
throw new ResourceLimitExceededException(e.getMessage());
}
return accumulated;
}
};
return new Pair<>(index, accumulator);
}
use of org.apache.druid.segment.incremental.IncrementalIndexSchema in project druid by druid-io.
the class SegmentGenerator method generate.
public QueryableIndex generate(final DataSegment dataSegment, final GeneratorSchemaInfo schemaInfo, final Granularity granularity, final int numRows) {
// In case we need to generate hyperUniques.
ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());
final String dataHash = Hashing.sha256().newHasher().putString(dataSegment.getId().toString(), StandardCharsets.UTF_8).putString(schemaInfo.toString(), StandardCharsets.UTF_8).putString(granularity.toString(), StandardCharsets.UTF_8).putInt(numRows).hash().toString();
final File outDir = new File(getSegmentDir(dataSegment.getId(), dataHash), "merged");
if (outDir.exists()) {
try {
log.info("Found segment with hash[%s] cached in directory[%s].", dataHash, outDir);
return TestHelper.getTestIndexIO().loadIndex(outDir);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
log.info("Writing segment with hash[%s] to directory[%s].", dataHash, outDir);
final DataGenerator dataGenerator = new DataGenerator(schemaInfo.getColumnSchemas(), dataSegment.getId().hashCode(), /* Use segment identifier hashCode as seed */
schemaInfo.getDataInterval(), numRows);
final IncrementalIndexSchema indexSchema = new IncrementalIndexSchema.Builder().withDimensionsSpec(schemaInfo.getDimensionsSpec()).withMetrics(schemaInfo.getAggsArray()).withRollup(schemaInfo.isWithRollup()).withQueryGranularity(granularity).build();
final List<InputRow> rows = new ArrayList<>();
final List<QueryableIndex> indexes = new ArrayList<>();
for (int i = 0; i < numRows; i++) {
final InputRow row = dataGenerator.nextRow();
rows.add(row);
if ((i + 1) % 20000 == 0) {
log.info("%,d/%,d rows generated for[%s].", i + 1, numRows, dataSegment);
}
if (rows.size() % MAX_ROWS_IN_MEMORY == 0) {
indexes.add(makeIndex(dataSegment.getId(), dataHash, indexes.size(), rows, indexSchema));
rows.clear();
}
}
log.info("%,d/%,d rows generated for[%s].", numRows, numRows, dataSegment);
if (rows.size() > 0) {
indexes.add(makeIndex(dataSegment.getId(), dataHash, indexes.size(), rows, indexSchema));
rows.clear();
}
final QueryableIndex retVal;
if (indexes.isEmpty()) {
throw new ISE("No rows to index?");
} else {
try {
final IndexSpec indexSpec = new IndexSpec(new RoaringBitmapSerdeFactory(true), null, null, null);
retVal = TestHelper.getTestIndexIO().loadIndex(TestHelper.getTestIndexMergerV9(OffHeapMemorySegmentWriteOutMediumFactory.instance()).mergeQueryableIndex(indexes, false, schemaInfo.getAggs().stream().map(AggregatorFactory::getCombiningFactory).toArray(AggregatorFactory[]::new), null, outDir, indexSpec, indexSpec, new BaseProgressIndicator(), null, -1));
for (QueryableIndex index : indexes) {
index.close();
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
log.info("Finished writing segment[%s] to[%s]", dataSegment, outDir);
return retVal;
}
use of org.apache.druid.segment.incremental.IncrementalIndexSchema in project druid by druid-io.
the class ExpressionSelectorsTest method test_incrementalIndexStringSelector.
@Test
public void test_incrementalIndexStringSelector() throws IndexSizeExceededException {
// This test covers a regression caused by ColumnCapabilites.isDictionaryEncoded not matching the value of
// DimensionSelector.nameLookupPossibleInAdvance in the indexers of an IncrementalIndex, which resulted in an
// exception trying to make an optimized string expression selector that was not appropriate to use for the
// underlying dimension selector.
// This occurred during schemaless ingestion with spare dimension values and no explicit null rows, so the
// conditions are replicated by this test. See https://github.com/apache/druid/pull/10248 for details
IncrementalIndexSchema schema = new IncrementalIndexSchema(0, new TimestampSpec("time", "millis", DateTimes.nowUtc()), Granularities.NONE, VirtualColumns.EMPTY, DimensionsSpec.EMPTY, new AggregatorFactory[] { new CountAggregatorFactory("count") }, true);
IncrementalIndex index = new OnheapIncrementalIndex.Builder().setMaxRowCount(100).setIndexSchema(schema).build();
index.add(new MapBasedInputRow(DateTimes.nowUtc().getMillis(), ImmutableList.of("x"), ImmutableMap.of("x", "foo")));
index.add(new MapBasedInputRow(DateTimes.nowUtc().plusMillis(1000).getMillis(), ImmutableList.of("y"), ImmutableMap.of("y", "foo")));
IncrementalIndexStorageAdapter adapter = new IncrementalIndexStorageAdapter(index);
Sequence<Cursor> cursors = adapter.makeCursors(null, Intervals.ETERNITY, VirtualColumns.EMPTY, Granularities.ALL, false, null);
int rowsProcessed = cursors.map(cursor -> {
DimensionSelector xExprSelector = ExpressionSelectors.makeDimensionSelector(cursor.getColumnSelectorFactory(), Parser.parse("concat(x, 'foo')", ExprMacroTable.nil()), null);
DimensionSelector yExprSelector = ExpressionSelectors.makeDimensionSelector(cursor.getColumnSelectorFactory(), Parser.parse("concat(y, 'foo')", ExprMacroTable.nil()), null);
int rowCount = 0;
while (!cursor.isDone()) {
Object x = xExprSelector.getObject();
Object y = yExprSelector.getObject();
List<String> expectedFoo = Collections.singletonList("foofoo");
List<String> expectedNull = NullHandling.replaceWithDefault() ? Collections.singletonList("foo") : Collections.singletonList(null);
if (rowCount == 0) {
Assert.assertEquals(expectedFoo, x);
Assert.assertEquals(expectedNull, y);
} else {
Assert.assertEquals(expectedNull, x);
Assert.assertEquals(expectedFoo, y);
}
rowCount++;
cursor.advance();
}
return rowCount;
}).accumulate(0, (in, acc) -> in + acc);
Assert.assertEquals(2, rowsProcessed);
}
Aggregations