use of org.apache.druid.segment.Segment in project druid by druid-io.
the class BroadcastJoinableMMappedQueryableSegmentizerFactoryTest method testSegmentizer.
@Test
public void testSegmentizer() throws IOException, SegmentLoadingException {
final ObjectMapper mapper = new DefaultObjectMapper();
mapper.registerModule(new SegmentizerModule());
final IndexIO indexIO = new IndexIO(mapper, () -> 0);
mapper.setInjectableValues(new InjectableValues.Std().addValue(ExprMacroTable.class.getName(), TestExprMacroTable.INSTANCE).addValue(ObjectMapper.class.getName(), mapper).addValue(IndexIO.class, indexIO).addValue(DataSegment.PruneSpecsHolder.class, DataSegment.PruneSpecsHolder.DEFAULT));
IndexMerger indexMerger = new IndexMergerV9(mapper, indexIO, OffHeapMemorySegmentWriteOutMediumFactory.instance());
SegmentizerFactory expectedFactory = new BroadcastJoinableMMappedQueryableSegmentizerFactory(indexIO, KEY_COLUMNS);
Interval testInterval = Intervals.of("2011-01-12T00:00:00.000Z/2011-05-01T00:00:00.000Z");
IncrementalIndex data = TestIndex.makeRealtimeIndex("druid.sample.numeric.tsv");
List<String> columnNames = data.getColumnNames();
File segment = new File(temporaryFolder.newFolder(), "segment");
File persistedSegmentRoot = indexMerger.persist(data, testInterval, segment, new IndexSpec(null, null, null, null, expectedFactory), null);
File factoryJson = new File(persistedSegmentRoot, "factory.json");
Assert.assertTrue(factoryJson.exists());
SegmentizerFactory factory = mapper.readValue(factoryJson, SegmentizerFactory.class);
Assert.assertTrue(factory instanceof BroadcastJoinableMMappedQueryableSegmentizerFactory);
Assert.assertEquals(expectedFactory, factory);
// load a segment
final DataSegment dataSegment = new DataSegment(TABLE_NAME, testInterval, DateTimes.nowUtc().toString(), ImmutableMap.of(), columnNames, ImmutableList.of(), null, null, persistedSegmentRoot.getTotalSpace());
final Segment loaded = factory.factorize(dataSegment, persistedSegmentRoot, false, SegmentLazyLoadFailCallback.NOOP);
final BroadcastSegmentIndexedTable table = (BroadcastSegmentIndexedTable) loaded.as(IndexedTable.class);
Assert.assertNotNull(table);
}
use of org.apache.druid.segment.Segment in project druid by druid-io.
the class SegmentManagerTest method testDropSegment.
@Test
public void testDropSegment() throws SegmentLoadingException, ExecutionException, InterruptedException {
for (DataSegment eachSegment : SEGMENTS) {
Assert.assertTrue(segmentManager.loadSegment(eachSegment, false, SegmentLazyLoadFailCallback.NOOP));
}
final List<Future<Void>> futures = ImmutableList.of(SEGMENTS.get(0), SEGMENTS.get(2)).stream().map(segment -> executor.submit(() -> {
segmentManager.dropSegment(segment);
return (Void) null;
})).collect(Collectors.toList());
for (Future<Void> eachFuture : futures) {
eachFuture.get();
}
assertResult(ImmutableList.of(SEGMENTS.get(1), SEGMENTS.get(3), SEGMENTS.get(4)));
}
use of org.apache.druid.segment.Segment in project druid by druid-io.
the class DistinctCountGroupByQueryTest method testGroupByWithDistinctCountAgg.
@Test
public void testGroupByWithDistinctCountAgg() throws Exception {
IncrementalIndex index = new OnheapIncrementalIndex.Builder().setIndexSchema(new IncrementalIndexSchema.Builder().withQueryGranularity(Granularities.SECOND).withMetrics(new CountAggregatorFactory("cnt")).build()).setConcurrentEventAdd(true).setMaxRowCount(1000).build();
String visitor_id = "visitor_id";
String client_type = "client_type";
long timestamp = DateTimes.of("2010-01-01").getMillis();
index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "0", client_type, "iphone")));
index.add(new MapBasedInputRow(timestamp + 1, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "1", client_type, "iphone")));
index.add(new MapBasedInputRow(timestamp + 2, Lists.newArrayList(visitor_id, client_type), ImmutableMap.of(visitor_id, "2", client_type, "android")));
GroupByQuery query = new GroupByQuery.Builder().setDataSource(QueryRunnerTestHelper.DATA_SOURCE).setGranularity(QueryRunnerTestHelper.ALL_GRAN).setDimensions(new DefaultDimensionSpec(client_type, client_type)).setInterval(QueryRunnerTestHelper.FULL_ON_INTERVAL_SPEC).setLimitSpec(new DefaultLimitSpec(Collections.singletonList(new OrderByColumnSpec(client_type, OrderByColumnSpec.Direction.DESCENDING)), 10)).setAggregatorSpecs(QueryRunnerTestHelper.ROWS_COUNT, new DistinctCountAggregatorFactory("UV", visitor_id, null)).build();
final Segment incrementalIndexSegment = new IncrementalIndexSegment(index, null);
Iterable<ResultRow> results = GroupByQueryRunnerTestHelper.runQuery(factory, factory.createRunner(incrementalIndexSegment), query);
List<ResultRow> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970-01-01T00:00:00.000Z", client_type, "iphone", "UV", 2L, "rows", 2L), GroupByQueryRunnerTestHelper.createExpectedRow(query, "1970-01-01T00:00:00.000Z", client_type, "android", "UV", 1L, "rows", 1L));
TestHelper.assertExpectedObjects(expectedResults, results, "distinct-count");
}
use of org.apache.druid.segment.Segment in project druid by druid-io.
the class ScanQueryEngine method process.
public Sequence<ScanResultValue> process(final ScanQuery query, final Segment segment, final ResponseContext responseContext) {
// "legacy" should be non-null due to toolChest.mergeResults
final boolean legacy = Preconditions.checkNotNull(query.isLegacy(), "Expected non-null 'legacy' parameter");
final Long numScannedRows = responseContext.getRowScanCount();
if (numScannedRows != null && numScannedRows >= query.getScanRowsLimit() && query.getTimeOrder().equals(ScanQuery.Order.NONE)) {
return Sequences.empty();
}
final boolean hasTimeout = QueryContexts.hasTimeout(query);
final Long timeoutAt = responseContext.getTimeoutTime();
final long start = System.currentTimeMillis();
final StorageAdapter adapter = segment.asStorageAdapter();
if (adapter == null) {
throw new ISE("Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped.");
}
final List<String> allColumns = new ArrayList<>();
if (query.getColumns() != null && !query.getColumns().isEmpty()) {
if (legacy && !query.getColumns().contains(LEGACY_TIMESTAMP_KEY)) {
allColumns.add(LEGACY_TIMESTAMP_KEY);
}
// Unless we're in legacy mode, allColumns equals query.getColumns() exactly. This is nice since it makes
// the compactedList form easier to use.
allColumns.addAll(query.getColumns());
} else {
final Set<String> availableColumns = Sets.newLinkedHashSet(Iterables.concat(Collections.singleton(legacy ? LEGACY_TIMESTAMP_KEY : ColumnHolder.TIME_COLUMN_NAME), Iterables.transform(Arrays.asList(query.getVirtualColumns().getVirtualColumns()), VirtualColumn::getOutputName), adapter.getAvailableDimensions(), adapter.getAvailableMetrics()));
allColumns.addAll(availableColumns);
if (legacy) {
allColumns.remove(ColumnHolder.TIME_COLUMN_NAME);
}
}
final List<Interval> intervals = query.getQuerySegmentSpec().getIntervals();
Preconditions.checkArgument(intervals.size() == 1, "Can only handle a single interval, got[%s]", intervals);
final SegmentId segmentId = segment.getId();
final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getFilter()));
// If the row count is not set, set it to 0, else do nothing.
responseContext.addRowScanCount(0);
final long limit = calculateRemainingScanRowsLimit(query, responseContext);
return Sequences.concat(adapter.makeCursors(filter, intervals.get(0), query.getVirtualColumns(), Granularities.ALL, query.getTimeOrder().equals(ScanQuery.Order.DESCENDING) || (query.getTimeOrder().equals(ScanQuery.Order.NONE) && query.isDescending()), null).map(cursor -> new BaseSequence<>(new BaseSequence.IteratorMaker<ScanResultValue, Iterator<ScanResultValue>>() {
@Override
public Iterator<ScanResultValue> make() {
final List<BaseObjectColumnValueSelector> columnSelectors = new ArrayList<>(allColumns.size());
for (String column : allColumns) {
final BaseObjectColumnValueSelector selector;
if (legacy && LEGACY_TIMESTAMP_KEY.equals(column)) {
selector = cursor.getColumnSelectorFactory().makeColumnValueSelector(ColumnHolder.TIME_COLUMN_NAME);
} else {
selector = cursor.getColumnSelectorFactory().makeColumnValueSelector(column);
}
columnSelectors.add(selector);
}
final int batchSize = query.getBatchSize();
return new Iterator<ScanResultValue>() {
private long offset = 0;
@Override
public boolean hasNext() {
return !cursor.isDone() && offset < limit;
}
@Override
public ScanResultValue next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
if (hasTimeout && System.currentTimeMillis() >= timeoutAt) {
throw new QueryTimeoutException(StringUtils.nonStrictFormat("Query [%s] timed out", query.getId()));
}
final long lastOffset = offset;
final Object events;
final ScanQuery.ResultFormat resultFormat = query.getResultFormat();
if (ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST.equals(resultFormat)) {
events = rowsToCompactedList();
} else if (ScanQuery.ResultFormat.RESULT_FORMAT_LIST.equals(resultFormat)) {
events = rowsToList();
} else {
throw new UOE("resultFormat[%s] is not supported", resultFormat.toString());
}
responseContext.addRowScanCount(offset - lastOffset);
if (hasTimeout) {
responseContext.putTimeoutTime(timeoutAt - (System.currentTimeMillis() - start));
}
return new ScanResultValue(segmentId.toString(), allColumns, events);
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
private List<List<Object>> rowsToCompactedList() {
final List<List<Object>> events = new ArrayList<>(batchSize);
final long iterLimit = Math.min(limit, offset + batchSize);
for (; !cursor.isDone() && offset < iterLimit; cursor.advance(), offset++) {
final List<Object> theEvent = new ArrayList<>(allColumns.size());
for (int j = 0; j < allColumns.size(); j++) {
theEvent.add(getColumnValue(j));
}
events.add(theEvent);
}
return events;
}
private List<Map<String, Object>> rowsToList() {
List<Map<String, Object>> events = Lists.newArrayListWithCapacity(batchSize);
final long iterLimit = Math.min(limit, offset + batchSize);
for (; !cursor.isDone() && offset < iterLimit; cursor.advance(), offset++) {
final Map<String, Object> theEvent = new LinkedHashMap<>();
for (int j = 0; j < allColumns.size(); j++) {
theEvent.put(allColumns.get(j), getColumnValue(j));
}
events.add(theEvent);
}
return events;
}
private Object getColumnValue(int i) {
final BaseObjectColumnValueSelector selector = columnSelectors.get(i);
final Object value;
if (legacy && allColumns.get(i).equals(LEGACY_TIMESTAMP_KEY)) {
value = DateTimes.utc((long) selector.getObject());
} else {
value = selector == null ? null : selector.getObject();
}
return value;
}
};
}
@Override
public void cleanup(Iterator<ScanResultValue> iterFromMake) {
}
})));
}
use of org.apache.druid.segment.Segment in project druid by druid-io.
the class QueryRunnerTestHelper method makeFilteringQueryRunner.
public static <T> QueryRunner<T> makeFilteringQueryRunner(final VersionedIntervalTimeline<String, ReferenceCountingSegment> timeline, final QueryRunnerFactory<T, Query<T>> factory) {
final QueryToolChest<T, Query<T>> toolChest = factory.getToolchest();
return new FluentQueryRunnerBuilder<T>(toolChest).create(new QueryRunner<T>() {
@Override
public Sequence<T> run(QueryPlus<T> queryPlus, ResponseContext responseContext) {
Query<T> query = queryPlus.getQuery();
List<TimelineObjectHolder> segments = new ArrayList<>();
for (Interval interval : query.getIntervals()) {
segments.addAll(timeline.lookup(interval));
}
List<Sequence<T>> sequences = new ArrayList<>();
for (TimelineObjectHolder<String, ReferenceCountingSegment> holder : toolChest.filterSegments(query, segments)) {
Segment segment = holder.getObject().getChunk(0).getObject();
QueryPlus queryPlusRunning = queryPlus.withQuery(queryPlus.getQuery().withQuerySegmentSpec(new SpecificSegmentSpec(new SegmentDescriptor(holder.getInterval(), holder.getVersion(), 0))));
sequences.add(factory.createRunner(segment).run(queryPlusRunning, responseContext));
}
return new MergeSequence<>(query.getResultOrdering(), Sequences.simple(sequences));
}
}).applyPreMergeDecoration().mergeResults().applyPostMergeDecoration();
}
Aggregations