Search in sources :

Example 1 with ReferenceCountingIndexedTable

use of org.apache.druid.segment.join.table.ReferenceCountingIndexedTable in project druid by druid-io.

the class SegmentManager method loadSegment.

/**
 * Load a single segment.
 *
 * @param segment segment to load
 * @param lazy    whether to lazy load columns metadata
 * @param loadFailed callBack to execute when segment lazy load failed
 *
 * @return true if the segment was newly loaded, false if it was already loaded
 *
 * @throws SegmentLoadingException if the segment cannot be loaded
 */
public boolean loadSegment(final DataSegment segment, boolean lazy, SegmentLazyLoadFailCallback loadFailed) throws SegmentLoadingException {
    final ReferenceCountingSegment adapter = getSegmentReference(segment, lazy, loadFailed);
    final SettableSupplier<Boolean> resultSupplier = new SettableSupplier<>();
    // compute() is used to ensure that the operation for a data source is executed atomically
    dataSources.compute(segment.getDataSource(), (k, v) -> {
        final DataSourceState dataSourceState = v == null ? new DataSourceState() : v;
        final VersionedIntervalTimeline<String, ReferenceCountingSegment> loadedIntervals = dataSourceState.getTimeline();
        final PartitionChunk<ReferenceCountingSegment> entry = loadedIntervals.findChunk(segment.getInterval(), segment.getVersion(), segment.getShardSpec().getPartitionNum());
        if (entry != null) {
            log.warn("Told to load an adapter for segment[%s] that already exists", segment.getId());
            resultSupplier.set(false);
        } else {
            IndexedTable table = adapter.as(IndexedTable.class);
            if (table != null) {
                if (dataSourceState.isEmpty() || dataSourceState.numSegments == dataSourceState.tablesLookup.size()) {
                    dataSourceState.tablesLookup.put(segment.getId(), new ReferenceCountingIndexedTable(table));
                } else {
                    log.error("Cannot load segment[%s] with IndexedTable, no existing segments are joinable", segment.getId());
                }
            } else if (dataSourceState.tablesLookup.size() > 0) {
                log.error("Cannot load segment[%s] without IndexedTable, all existing segments are joinable", segment.getId());
            }
            loadedIntervals.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(adapter));
            dataSourceState.addSegment(segment);
            resultSupplier.set(true);
        }
        return dataSourceState;
    });
    return resultSupplier.get();
}
Also used : ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) ReferenceCountingIndexedTable(org.apache.druid.segment.join.table.ReferenceCountingIndexedTable) ReferenceCountingIndexedTable(org.apache.druid.segment.join.table.ReferenceCountingIndexedTable) IndexedTable(org.apache.druid.segment.join.table.IndexedTable)

Example 2 with ReferenceCountingIndexedTable

use of org.apache.druid.segment.join.table.ReferenceCountingIndexedTable in project druid by druid-io.

the class BroadcastTableJoinableFactory method getOnlyIndexedTable.

private Optional<ReferenceCountingIndexedTable> getOnlyIndexedTable(DataSource dataSource) {
    GlobalTableDataSource broadcastDataSource = (GlobalTableDataSource) dataSource;
    DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(dataSource);
    return segmentManager.getIndexedTables(analysis).flatMap(tables -> {
        Iterator<ReferenceCountingIndexedTable> tableIterator = tables.iterator();
        if (!tableIterator.hasNext()) {
            return Optional.empty();
        }
        try {
            return Optional.of(Iterators.getOnlyElement(tableIterator));
        } catch (IllegalArgumentException iae) {
            throw new ISE("Currently only single segment datasources are supported for broadcast joins, dataSource[%s] has multiple segments. Reingest the data so that it is entirely contained within a single segment to use in JOIN queries.", broadcastDataSource.getName());
        }
    });
}
Also used : ReferenceCountingIndexedTable(org.apache.druid.segment.join.table.ReferenceCountingIndexedTable) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) ISE(org.apache.druid.java.util.common.ISE) DataSourceAnalysis(org.apache.druid.query.planning.DataSourceAnalysis)

Example 3 with ReferenceCountingIndexedTable

use of org.apache.druid.segment.join.table.ReferenceCountingIndexedTable in project druid by druid-io.

the class SegmentManager method dropSegment.

public void dropSegment(final DataSegment segment) {
    final String dataSource = segment.getDataSource();
    // compute() is used to ensure that the operation for a data source is executed atomically
    dataSources.compute(dataSource, (dataSourceName, dataSourceState) -> {
        if (dataSourceState == null) {
            log.info("Told to delete a queryable for a dataSource[%s] that doesn't exist.", dataSourceName);
            return null;
        } else {
            final VersionedIntervalTimeline<String, ReferenceCountingSegment> loadedIntervals = dataSourceState.getTimeline();
            final ShardSpec shardSpec = segment.getShardSpec();
            final PartitionChunk<ReferenceCountingSegment> removed = loadedIntervals.remove(segment.getInterval(), segment.getVersion(), // partitionChunk. Note that partitionChunk.equals() checks only the partitionNum, but not the object.
            segment.getShardSpec().createChunk(ReferenceCountingSegment.wrapSegment(null, shardSpec)));
            final ReferenceCountingSegment oldQueryable = (removed == null) ? null : removed.getObject();
            if (oldQueryable != null) {
                try (final Closer closer = Closer.create()) {
                    dataSourceState.removeSegment(segment);
                    closer.register(oldQueryable);
                    log.info("Attempting to close segment %s", segment.getId());
                    final ReferenceCountingIndexedTable oldTable = dataSourceState.tablesLookup.remove(segment.getId());
                    if (oldTable != null) {
                        closer.register(oldTable);
                    }
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            } else {
                log.info("Told to delete a queryable on dataSource[%s] for interval[%s] and version[%s] that I don't have.", dataSourceName, segment.getInterval(), segment.getVersion());
            }
            // Returning null removes the entry of dataSource from the map
            return dataSourceState.isEmpty() ? null : dataSourceState;
        }
    });
    segmentLoader.cleanup(segment);
}
Also used : ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) Closer(org.apache.druid.java.util.common.io.Closer) ReferenceCountingIndexedTable(org.apache.druid.segment.join.table.ReferenceCountingIndexedTable) IOException(java.io.IOException) ShardSpec(org.apache.druid.timeline.partition.ShardSpec)

Example 4 with ReferenceCountingIndexedTable

use of org.apache.druid.segment.join.table.ReferenceCountingIndexedTable in project druid by druid-io.

the class SegmentManager method getIndexedTables.

/**
 * Returns the collection of {@link IndexedTable} for the entire timeline (since join conditions do not currently
 * consider the queries intervals), if the timeline exists for each of its segments that are joinable.
 */
public Optional<Stream<ReferenceCountingIndexedTable>> getIndexedTables(DataSourceAnalysis analysis) {
    return getTimeline(analysis).map(timeline -> {
        // join doesn't currently consider intervals, so just consider all segments
        final Stream<ReferenceCountingSegment> segments = timeline.lookup(Intervals.ETERNITY).stream().flatMap(x -> StreamSupport.stream(x.getObject().payloads().spliterator(), false));
        final TableDataSource tableDataSource = getTableDataSource(analysis);
        ConcurrentHashMap<SegmentId, ReferenceCountingIndexedTable> tables = Optional.ofNullable(dataSources.get(tableDataSource.getName())).map(DataSourceState::getTablesLookup).orElseThrow(() -> new ISE("Datasource %s does not have IndexedTables", tableDataSource.getName()));
        return segments.map(segment -> tables.get(segment.getId())).filter(Objects::nonNull);
    });
}
Also used : ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) ReferenceCountingIndexedTable(org.apache.druid.segment.join.table.ReferenceCountingIndexedTable) DataSourceAnalysis(org.apache.druid.query.planning.DataSourceAnalysis) Intervals(org.apache.druid.java.util.common.Intervals) Inject(com.google.inject.Inject) SegmentLazyLoadFailCallback(org.apache.druid.segment.SegmentLazyLoadFailCallback) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) CollectionUtils(org.apache.druid.utils.CollectionUtils) SegmentLoader(org.apache.druid.segment.loading.SegmentLoader) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) Map(java.util.Map) StreamSupport(java.util.stream.StreamSupport) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) Closer(org.apache.druid.java.util.common.io.Closer) IndexedTable(org.apache.druid.segment.join.table.IndexedTable) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) IOException(java.io.IOException) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) TableDataSource(org.apache.druid.query.TableDataSource) Objects(java.util.Objects) Stream(java.util.stream.Stream) Ordering(com.google.common.collect.Ordering) DataSegment(org.apache.druid.timeline.DataSegment) Optional(java.util.Optional) VisibleForTesting(com.google.common.annotations.VisibleForTesting) SegmentId(org.apache.druid.timeline.SegmentId) ReferenceCountingIndexedTable(org.apache.druid.segment.join.table.ReferenceCountingIndexedTable) TableDataSource(org.apache.druid.query.TableDataSource) SegmentId(org.apache.druid.timeline.SegmentId) Objects(java.util.Objects) ISE(org.apache.druid.java.util.common.ISE)

Aggregations

ReferenceCountingIndexedTable (org.apache.druid.segment.join.table.ReferenceCountingIndexedTable)4 ReferenceCountingSegment (org.apache.druid.segment.ReferenceCountingSegment)3 IOException (java.io.IOException)2 SettableSupplier (org.apache.druid.common.guava.SettableSupplier)2 ISE (org.apache.druid.java.util.common.ISE)2 Closer (org.apache.druid.java.util.common.io.Closer)2 DataSourceAnalysis (org.apache.druid.query.planning.DataSourceAnalysis)2 IndexedTable (org.apache.druid.segment.join.table.IndexedTable)2 ShardSpec (org.apache.druid.timeline.partition.ShardSpec)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Ordering (com.google.common.collect.Ordering)1 Inject (com.google.inject.Inject)1 Map (java.util.Map)1 Objects (java.util.Objects)1 Optional (java.util.Optional)1 Set (java.util.Set)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 Stream (java.util.stream.Stream)1 StreamSupport (java.util.stream.StreamSupport)1 Intervals (org.apache.druid.java.util.common.Intervals)1