use of org.joda.time.Interval in project druid by druid-io.
the class SegmentMetadataQueryQueryToolChest method filterSegments.
@Override
public <T extends LogicalSegment> List<T> filterSegments(SegmentMetadataQuery query, List<T> segments) {
if (!query.isUsingDefaultInterval()) {
return segments;
}
if (segments.size() <= 1) {
return segments;
}
final T max = segments.get(segments.size() - 1);
DateTime targetEnd = max.getInterval().getEnd();
final Interval targetInterval = new Interval(config.getDefaultHistory(), targetEnd);
return Lists.newArrayList(Iterables.filter(segments, new Predicate<T>() {
@Override
public boolean apply(T input) {
return (input.getInterval().overlaps(targetInterval));
}
}));
}
use of org.joda.time.Interval in project druid by druid-io.
the class IndexMerger method makeIndexFiles.
protected File makeIndexFiles(final List<IndexableAdapter> indexes, final AggregatorFactory[] metricAggs, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Function<ArrayList<Iterable<Rowboat>>, Iterable<Rowboat>> rowMergerFn, final IndexSpec indexSpec) throws IOException {
List<Metadata> metadataList = Lists.transform(indexes, new Function<IndexableAdapter, Metadata>() {
@Nullable
@Override
public Metadata apply(IndexableAdapter input) {
return input.getMetadata();
}
});
Metadata segmentMetadata = null;
if (metricAggs != null) {
AggregatorFactory[] combiningMetricAggs = new AggregatorFactory[metricAggs.length];
for (int i = 0; i < metricAggs.length; i++) {
combiningMetricAggs[i] = metricAggs[i].getCombiningFactory();
}
segmentMetadata = Metadata.merge(metadataList, combiningMetricAggs);
} else {
segmentMetadata = Metadata.merge(metadataList, null);
}
final Map<String, ValueType> valueTypes = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
final Map<String, String> metricTypeNames = Maps.newTreeMap(Ordering.<String>natural().nullsFirst());
final Map<String, ColumnCapabilitiesImpl> columnCapabilities = Maps.newHashMap();
final List<ColumnCapabilitiesImpl> dimCapabilities = new ArrayList<>();
for (IndexableAdapter adapter : indexes) {
for (String dimension : adapter.getDimensionNames()) {
ColumnCapabilitiesImpl mergedCapabilities = columnCapabilities.get(dimension);
ColumnCapabilities capabilities = adapter.getCapabilities(dimension);
if (mergedCapabilities == null) {
mergedCapabilities = new ColumnCapabilitiesImpl();
}
columnCapabilities.put(dimension, mergedCapabilities.merge(capabilities));
}
for (String metric : adapter.getMetricNames()) {
ColumnCapabilitiesImpl mergedCapabilities = columnCapabilities.get(metric);
ColumnCapabilities capabilities = adapter.getCapabilities(metric);
if (mergedCapabilities == null) {
mergedCapabilities = new ColumnCapabilitiesImpl();
}
columnCapabilities.put(metric, mergedCapabilities.merge(capabilities));
valueTypes.put(metric, capabilities.getType());
metricTypeNames.put(metric, adapter.getMetricType(metric));
}
}
for (String dimension : mergedDimensions) {
dimCapabilities.add(columnCapabilities.get(dimension));
}
Closer closer = Closer.create();
try {
final Interval dataInterval;
final File v8OutDir = new File(outDir, "v8-tmp");
FileUtils.forceMkdir(v8OutDir);
registerDeleteDirectory(closer, v8OutDir);
File tmpPeonFilesDir = new File(v8OutDir, "tmpPeonFiles");
FileUtils.forceMkdir(tmpPeonFilesDir);
registerDeleteDirectory(closer, tmpPeonFilesDir);
final IOPeon ioPeon = new TmpFileIOPeon(tmpPeonFilesDir, true);
closer.register(ioPeon);
/************* Main index.drd file **************/
progress.progress();
long startTime = System.currentTimeMillis();
File indexFile = new File(v8OutDir, "index.drd");
try (FileOutputStream fileOutputStream = new FileOutputStream(indexFile);
FileChannel channel = fileOutputStream.getChannel()) {
channel.write(ByteBuffer.wrap(new byte[] { IndexIO.V8_VERSION }));
GenericIndexed.fromIterable(mergedDimensions, GenericIndexed.STRING_STRATEGY).writeToChannel(channel);
GenericIndexed.fromIterable(mergedMetrics, GenericIndexed.STRING_STRATEGY).writeToChannel(channel);
DateTime minTime = new DateTime(JodaUtils.MAX_INSTANT);
DateTime maxTime = new DateTime(JodaUtils.MIN_INSTANT);
for (IndexableAdapter index : indexes) {
minTime = JodaUtils.minDateTime(minTime, index.getDataInterval().getStart());
maxTime = JodaUtils.maxDateTime(maxTime, index.getDataInterval().getEnd());
}
dataInterval = new Interval(minTime, maxTime);
serializerUtils.writeString(channel, String.format("%s/%s", minTime, maxTime));
serializerUtils.writeString(channel, mapper.writeValueAsString(indexSpec.getBitmapSerdeFactory()));
}
IndexIO.checkFileSize(indexFile);
log.info("outDir[%s] completed index.drd in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
/************* Setup Dim Conversions **************/
progress.progress();
startTime = System.currentTimeMillis();
final ArrayList<FileOutputSupplier> dimOuts = Lists.newArrayListWithCapacity(mergedDimensions.size());
final DimensionHandler[] handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
final List<DimensionMerger> mergers = new ArrayList<>();
for (int i = 0; i < mergedDimensions.size(); i++) {
DimensionMergerLegacy merger = handlers[i].makeLegacyMerger(indexSpec, v8OutDir, ioPeon, dimCapabilities.get(i), progress);
mergers.add(merger);
merger.writeMergedValueMetadata(indexes);
FileOutputSupplier dimOut = new FileOutputSupplier(merger.makeDimFile(), true);
merger.writeValueMetadataToFile(dimOut);
dimOuts.add(dimOut);
}
log.info("outDir[%s] completed dim conversions in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
/************* Walk through data sets and merge them *************/
progress.progress();
startTime = System.currentTimeMillis();
Iterable<Rowboat> theRows = makeRowIterable(indexes, mergedDimensions, mergedMetrics, rowMergerFn, dimCapabilities, handlers, mergers);
LongSupplierSerializer timeWriter = CompressionFactory.getLongSerializer(ioPeon, "little_end_time", IndexIO.BYTE_ORDER, indexSpec.getLongEncoding(), CompressedObjectStrategy.DEFAULT_COMPRESSION_STRATEGY);
timeWriter.open();
ArrayList<MetricColumnSerializer> metWriters = Lists.newArrayListWithCapacity(mergedMetrics.size());
final CompressedObjectStrategy.CompressionStrategy metCompression = indexSpec.getMetricCompression();
final CompressionFactory.LongEncodingStrategy longEncoding = indexSpec.getLongEncoding();
for (String metric : mergedMetrics) {
ValueType type = valueTypes.get(metric);
switch(type) {
case LONG:
metWriters.add(new LongMetricColumnSerializer(metric, v8OutDir, ioPeon, metCompression, longEncoding));
break;
case FLOAT:
metWriters.add(new FloatMetricColumnSerializer(metric, v8OutDir, ioPeon, metCompression));
break;
case COMPLEX:
final String typeName = metricTypeNames.get(metric);
ComplexMetricSerde serde = ComplexMetrics.getSerdeForType(typeName);
if (serde == null) {
throw new ISE("Unknown type[%s]", typeName);
}
metWriters.add(new ComplexMetricColumnSerializer(metric, v8OutDir, ioPeon, serde));
break;
default:
throw new ISE("Unknown type[%s]", type);
}
}
for (MetricColumnSerializer metWriter : metWriters) {
metWriter.open();
}
int rowCount = 0;
long time = System.currentTimeMillis();
List<IntBuffer> rowNumConversions = Lists.newArrayListWithCapacity(indexes.size());
for (IndexableAdapter index : indexes) {
int[] arr = new int[index.getNumRows()];
Arrays.fill(arr, INVALID_ROW);
rowNumConversions.add(IntBuffer.wrap(arr));
}
for (Rowboat theRow : theRows) {
progress.progress();
timeWriter.add(theRow.getTimestamp());
final Object[] metrics = theRow.getMetrics();
for (int i = 0; i < metrics.length; ++i) {
metWriters.get(i).serialize(metrics[i]);
}
Object[] dims = theRow.getDims();
for (int i = 0; i < dims.length; ++i) {
mergers.get(i).processMergedRow(dims[i]);
}
for (Map.Entry<Integer, TreeSet<Integer>> comprisedRow : theRow.getComprisedRows().entrySet()) {
final IntBuffer conversionBuffer = rowNumConversions.get(comprisedRow.getKey());
for (Integer rowNum : comprisedRow.getValue()) {
while (conversionBuffer.position() < rowNum) {
conversionBuffer.put(INVALID_ROW);
}
conversionBuffer.put(rowCount);
}
}
if ((++rowCount % 500000) == 0) {
log.info("outDir[%s] walked 500,000/%,d rows in %,d millis.", v8OutDir, rowCount, System.currentTimeMillis() - time);
time = System.currentTimeMillis();
}
}
for (IntBuffer rowNumConversion : rowNumConversions) {
rowNumConversion.rewind();
}
final File timeFile = IndexIO.makeTimeFile(v8OutDir, IndexIO.BYTE_ORDER);
timeFile.delete();
ByteSink out = Files.asByteSink(timeFile, FileWriteMode.APPEND);
timeWriter.closeAndConsolidate(out);
IndexIO.checkFileSize(timeFile);
for (MetricColumnSerializer metWriter : metWriters) {
metWriter.close();
}
log.info("outDir[%s] completed walk through of %,d rows in %,d millis.", v8OutDir, rowCount, System.currentTimeMillis() - startTime);
/************ Create Inverted Indexes and Finalize Columns *************/
startTime = System.currentTimeMillis();
final File invertedFile = new File(v8OutDir, "inverted.drd");
Files.touch(invertedFile);
out = Files.asByteSink(invertedFile, FileWriteMode.APPEND);
final File geoFile = new File(v8OutDir, "spatial.drd");
Files.touch(geoFile);
OutputSupplier<FileOutputStream> spatialOut = Files.newOutputStreamSupplier(geoFile, true);
for (int i = 0; i < mergedDimensions.size(); i++) {
DimensionMergerLegacy legacyMerger = (DimensionMergerLegacy) mergers.get(i);
legacyMerger.writeIndexes(rowNumConversions, closer);
legacyMerger.writeIndexesToFiles(out, spatialOut);
legacyMerger.writeRowValuesToFile(dimOuts.get(i));
}
log.info("outDir[%s] completed inverted.drd and wrote dimensions in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime);
final Function<String, String> dimFilenameFunction = new Function<String, String>() {
@Override
public String apply(@Nullable String input) {
String formatString;
if (columnCapabilities.get(input).isDictionaryEncoded()) {
formatString = "dim_%s.drd";
} else {
formatString = String.format("numeric_dim_%%s_%s.drd", IndexIO.BYTE_ORDER);
}
return GuavaUtils.formatFunction(formatString).apply(input);
}
};
final ArrayList<String> expectedFiles = Lists.newArrayList(Iterables.concat(Arrays.asList("index.drd", "inverted.drd", "spatial.drd", String.format("time_%s.drd", IndexIO.BYTE_ORDER)), Iterables.transform(mergedDimensions, dimFilenameFunction), Iterables.transform(mergedMetrics, GuavaUtils.formatFunction(String.format("met_%%s_%s.drd", IndexIO.BYTE_ORDER)))));
if (segmentMetadata != null) {
writeMetadataToFile(new File(v8OutDir, "metadata.drd"), segmentMetadata);
log.info("wrote metadata.drd in outDir[%s].", v8OutDir);
expectedFiles.add("metadata.drd");
}
Map<String, File> files = Maps.newLinkedHashMap();
for (String fileName : expectedFiles) {
files.put(fileName, new File(v8OutDir, fileName));
}
File smooshDir = new File(v8OutDir, "smoosher");
FileUtils.forceMkdir(smooshDir);
for (Map.Entry<String, File> entry : Smoosh.smoosh(v8OutDir, smooshDir, files).entrySet()) {
entry.getValue().delete();
}
for (File file : smooshDir.listFiles()) {
Files.move(file, new File(v8OutDir, file.getName()));
}
if (!smooshDir.delete()) {
log.info("Unable to delete temporary dir[%s], contains[%s]", smooshDir, Arrays.asList(smooshDir.listFiles()));
throw new IOException(String.format("Unable to delete temporary dir[%s]", smooshDir));
}
createIndexDrdFile(IndexIO.V8_VERSION, v8OutDir, GenericIndexed.fromIterable(mergedDimensions, GenericIndexed.STRING_STRATEGY), GenericIndexed.fromIterable(mergedMetrics, GenericIndexed.STRING_STRATEGY), dataInterval, indexSpec.getBitmapSerdeFactory());
indexIO.getDefaultIndexIOHandler().convertV8toV9(v8OutDir, outDir, indexSpec);
return outDir;
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
}
use of org.joda.time.Interval in project druid by druid-io.
the class QueryableIndexStorageAdapter method makeCursors.
@Override
public Sequence<Cursor> makeCursors(Filter filter, Interval interval, VirtualColumns virtualColumns, Granularity gran, boolean descending) {
Interval actualInterval = interval;
long minDataTimestamp = getMinTime().getMillis();
long maxDataTimestamp = getMaxTime().getMillis();
final Interval dataInterval = new Interval(minDataTimestamp, gran.bucketEnd(getMaxTime()).getMillis());
if (!actualInterval.overlaps(dataInterval)) {
return Sequences.empty();
}
if (actualInterval.getStart().isBefore(dataInterval.getStart())) {
actualInterval = actualInterval.withStart(dataInterval.getStart());
}
if (actualInterval.getEnd().isAfter(dataInterval.getEnd())) {
actualInterval = actualInterval.withEnd(dataInterval.getEnd());
}
final ColumnSelectorBitmapIndexSelector selector = new ColumnSelectorBitmapIndexSelector(index.getBitmapFactoryForDimensions(), virtualColumns, index);
/**
* Filters can be applied in two stages:
* pre-filtering: Use bitmap indexes to prune the set of rows to be scanned.
* post-filtering: Iterate through rows and apply the filter to the row values
*
* The pre-filter and post-filter step have an implicit AND relationship. (i.e., final rows are those that
* were not pruned AND those that matched the filter during row scanning)
*
* An AND filter can have its subfilters partitioned across the two steps. The subfilters that can be
* processed entirely with bitmap indexes (subfilter returns true for supportsBitmapIndex())
* will be moved to the pre-filtering stage.
*
* Any subfilters that cannot be processed entirely with bitmap indexes will be moved to the post-filtering stage.
*/
final Offset offset;
final List<Filter> postFilters = new ArrayList<>();
if (filter == null) {
offset = new NoFilterOffset(0, index.getNumRows(), descending);
} else {
final List<Filter> preFilters = new ArrayList<>();
if (filter instanceof AndFilter) {
// If we get an AndFilter, we can split the subfilters across both filtering stages
for (Filter subfilter : ((AndFilter) filter).getFilters()) {
if (subfilter.supportsBitmapIndex(selector)) {
preFilters.add(subfilter);
} else {
postFilters.add(subfilter);
}
}
} else {
// If we get an OrFilter or a single filter, handle the filter in one stage
if (filter.supportsBitmapIndex(selector)) {
preFilters.add(filter);
} else {
postFilters.add(filter);
}
}
if (preFilters.size() == 0) {
offset = new NoFilterOffset(0, index.getNumRows(), descending);
} else {
// Use AndFilter.getBitmapIndex to intersect the preFilters to get its short-circuiting behavior.
offset = BitmapOffset.of(AndFilter.getBitmapIndex(selector, preFilters), descending, (long) getNumRows());
}
}
final Filter postFilter;
if (postFilters.size() == 0) {
postFilter = null;
} else if (postFilters.size() == 1) {
postFilter = postFilters.get(0);
} else {
postFilter = new AndFilter(postFilters);
}
return Sequences.filter(new CursorSequenceBuilder(this, actualInterval, virtualColumns, gran, offset, minDataTimestamp, maxDataTimestamp, descending, postFilter, selector).build(), Predicates.<Cursor>notNull());
}
use of org.joda.time.Interval in project druid by druid-io.
the class TopNQueryEngine method query.
public Sequence<Result<TopNResultValue>> query(final TopNQuery query, final StorageAdapter adapter) {
if (adapter == null) {
throw new SegmentMissingException("Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped.");
}
final List<Interval> queryIntervals = query.getQuerySegmentSpec().getIntervals();
final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getDimensionsFilter()));
final Granularity granularity = query.getGranularity();
final Function<Cursor, Result<TopNResultValue>> mapFn = getMapFn(query, adapter);
Preconditions.checkArgument(queryIntervals.size() == 1, "Can only handle a single interval, got[%s]", queryIntervals);
return Sequences.filter(Sequences.map(adapter.makeCursors(filter, queryIntervals.get(0), query.getVirtualColumns(), granularity, query.isDescending()), new Function<Cursor, Result<TopNResultValue>>() {
@Override
public Result<TopNResultValue> apply(Cursor input) {
log.debug("Running over cursor[%s]", adapter.getInterval(), input.getTime());
return mapFn.apply(input);
}
}), Predicates.<Result<TopNResultValue>>notNull());
}
use of org.joda.time.Interval in project druid by druid-io.
the class SelectQueryEngine method process.
public Sequence<Result<SelectResultValue>> process(final SelectQuery query, final Segment segment) {
final StorageAdapter adapter = segment.asStorageAdapter();
if (adapter == null) {
throw new ISE("Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped.");
}
// at the point where this code is called, only one datasource should exist.
String dataSource = Iterables.getOnlyElement(query.getDataSource().getNames());
final Iterable<DimensionSpec> dims;
if (query.getDimensions() == null || query.getDimensions().isEmpty()) {
dims = DefaultDimensionSpec.toSpec(adapter.getAvailableDimensions());
} else {
dims = query.getDimensions();
}
final Iterable<String> metrics;
if (query.getMetrics() == null || query.getMetrics().isEmpty()) {
metrics = adapter.getAvailableMetrics();
} else {
metrics = query.getMetrics();
}
List<Interval> intervals = query.getQuerySegmentSpec().getIntervals();
Preconditions.checkArgument(intervals.size() == 1, "Can only handle a single interval, got[%s]", intervals);
// should be rewritten with given interval
final String segmentId = DataSegmentUtils.withInterval(dataSource, segment.getIdentifier(), intervals.get(0));
final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getDimensionsFilter()));
return QueryRunnerHelper.makeCursorBasedQuery(adapter, query.getQuerySegmentSpec().getIntervals(), filter, query.getVirtualColumns(), query.isDescending(), query.getGranularity(), new Function<Cursor, Result<SelectResultValue>>() {
@Override
public Result<SelectResultValue> apply(Cursor cursor) {
final SelectResultValueBuilder builder = new SelectResultValueBuilder(cursor.getTime(), query.getPagingSpec(), query.isDescending());
final LongColumnSelector timestampColumnSelector = cursor.makeLongColumnSelector(Column.TIME_COLUMN_NAME);
final List<ColumnSelectorPlus<SelectColumnSelectorStrategy>> selectorPlusList = Arrays.asList(DimensionHandlerUtils.createColumnSelectorPluses(STRATEGY_FACTORY, Lists.newArrayList(dims), cursor));
for (DimensionSpec dimSpec : dims) {
builder.addDimension(dimSpec.getOutputName());
}
final Map<String, ObjectColumnSelector> metSelectors = Maps.newHashMap();
for (String metric : metrics) {
final ObjectColumnSelector metricSelector = cursor.makeObjectColumnSelector(metric);
metSelectors.put(metric, metricSelector);
builder.addMetric(metric);
}
final PagingOffset offset = query.getPagingOffset(segmentId);
cursor.advanceTo(offset.startDelta());
int lastOffset = offset.startOffset();
for (; !cursor.isDone() && offset.hasNext(); cursor.advance(), offset.next()) {
final Map<String, Object> theEvent = singleEvent(EventHolder.timestampKey, timestampColumnSelector, selectorPlusList, metSelectors);
builder.addEntry(new EventHolder(segmentId, lastOffset = offset.current(), theEvent));
}
builder.finished(segmentId, lastOffset);
return builder.build();
}
});
}
Aggregations