use of org.apache.druid.segment.join.table.ReferenceCountingIndexedTable in project druid by druid-io.
the class SegmentManager method loadSegment.
/**
* Load a single segment.
*
* @param segment segment to load
* @param lazy whether to lazy load columns metadata
* @param loadFailed callBack to execute when segment lazy load failed
*
* @return true if the segment was newly loaded, false if it was already loaded
*
* @throws SegmentLoadingException if the segment cannot be loaded
*/
public boolean loadSegment(final DataSegment segment, boolean lazy, SegmentLazyLoadFailCallback loadFailed) throws SegmentLoadingException {
final ReferenceCountingSegment adapter = getSegmentReference(segment, lazy, loadFailed);
final SettableSupplier<Boolean> resultSupplier = new SettableSupplier<>();
// compute() is used to ensure that the operation for a data source is executed atomically
dataSources.compute(segment.getDataSource(), (k, v) -> {
final DataSourceState dataSourceState = v == null ? new DataSourceState() : v;
final VersionedIntervalTimeline<String, ReferenceCountingSegment> loadedIntervals = dataSourceState.getTimeline();
final PartitionChunk<ReferenceCountingSegment> entry = loadedIntervals.findChunk(segment.getInterval(), segment.getVersion(), segment.getShardSpec().getPartitionNum());
if (entry != null) {
log.warn("Told to load an adapter for segment[%s] that already exists", segment.getId());
resultSupplier.set(false);
} else {
IndexedTable table = adapter.as(IndexedTable.class);
if (table != null) {
if (dataSourceState.isEmpty() || dataSourceState.numSegments == dataSourceState.tablesLookup.size()) {
dataSourceState.tablesLookup.put(segment.getId(), new ReferenceCountingIndexedTable(table));
} else {
log.error("Cannot load segment[%s] with IndexedTable, no existing segments are joinable", segment.getId());
}
} else if (dataSourceState.tablesLookup.size() > 0) {
log.error("Cannot load segment[%s] without IndexedTable, all existing segments are joinable", segment.getId());
}
loadedIntervals.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(adapter));
dataSourceState.addSegment(segment);
resultSupplier.set(true);
}
return dataSourceState;
});
return resultSupplier.get();
}
use of org.apache.druid.segment.join.table.ReferenceCountingIndexedTable in project druid by druid-io.
the class BroadcastTableJoinableFactory method getOnlyIndexedTable.
private Optional<ReferenceCountingIndexedTable> getOnlyIndexedTable(DataSource dataSource) {
GlobalTableDataSource broadcastDataSource = (GlobalTableDataSource) dataSource;
DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(dataSource);
return segmentManager.getIndexedTables(analysis).flatMap(tables -> {
Iterator<ReferenceCountingIndexedTable> tableIterator = tables.iterator();
if (!tableIterator.hasNext()) {
return Optional.empty();
}
try {
return Optional.of(Iterators.getOnlyElement(tableIterator));
} catch (IllegalArgumentException iae) {
throw new ISE("Currently only single segment datasources are supported for broadcast joins, dataSource[%s] has multiple segments. Reingest the data so that it is entirely contained within a single segment to use in JOIN queries.", broadcastDataSource.getName());
}
});
}
use of org.apache.druid.segment.join.table.ReferenceCountingIndexedTable in project druid by druid-io.
the class SegmentManager method dropSegment.
public void dropSegment(final DataSegment segment) {
final String dataSource = segment.getDataSource();
// compute() is used to ensure that the operation for a data source is executed atomically
dataSources.compute(dataSource, (dataSourceName, dataSourceState) -> {
if (dataSourceState == null) {
log.info("Told to delete a queryable for a dataSource[%s] that doesn't exist.", dataSourceName);
return null;
} else {
final VersionedIntervalTimeline<String, ReferenceCountingSegment> loadedIntervals = dataSourceState.getTimeline();
final ShardSpec shardSpec = segment.getShardSpec();
final PartitionChunk<ReferenceCountingSegment> removed = loadedIntervals.remove(segment.getInterval(), segment.getVersion(), // partitionChunk. Note that partitionChunk.equals() checks only the partitionNum, but not the object.
segment.getShardSpec().createChunk(ReferenceCountingSegment.wrapSegment(null, shardSpec)));
final ReferenceCountingSegment oldQueryable = (removed == null) ? null : removed.getObject();
if (oldQueryable != null) {
try (final Closer closer = Closer.create()) {
dataSourceState.removeSegment(segment);
closer.register(oldQueryable);
log.info("Attempting to close segment %s", segment.getId());
final ReferenceCountingIndexedTable oldTable = dataSourceState.tablesLookup.remove(segment.getId());
if (oldTable != null) {
closer.register(oldTable);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
} else {
log.info("Told to delete a queryable on dataSource[%s] for interval[%s] and version[%s] that I don't have.", dataSourceName, segment.getInterval(), segment.getVersion());
}
// Returning null removes the entry of dataSource from the map
return dataSourceState.isEmpty() ? null : dataSourceState;
}
});
segmentLoader.cleanup(segment);
}
use of org.apache.druid.segment.join.table.ReferenceCountingIndexedTable in project druid by druid-io.
the class SegmentManager method getIndexedTables.
/**
* Returns the collection of {@link IndexedTable} for the entire timeline (since join conditions do not currently
* consider the queries intervals), if the timeline exists for each of its segments that are joinable.
*/
public Optional<Stream<ReferenceCountingIndexedTable>> getIndexedTables(DataSourceAnalysis analysis) {
return getTimeline(analysis).map(timeline -> {
// join doesn't currently consider intervals, so just consider all segments
final Stream<ReferenceCountingSegment> segments = timeline.lookup(Intervals.ETERNITY).stream().flatMap(x -> StreamSupport.stream(x.getObject().payloads().spliterator(), false));
final TableDataSource tableDataSource = getTableDataSource(analysis);
ConcurrentHashMap<SegmentId, ReferenceCountingIndexedTable> tables = Optional.ofNullable(dataSources.get(tableDataSource.getName())).map(DataSourceState::getTablesLookup).orElseThrow(() -> new ISE("Datasource %s does not have IndexedTables", tableDataSource.getName()));
return segments.map(segment -> tables.get(segment.getId())).filter(Objects::nonNull);
});
}
Aggregations