Search in sources :

Example 6 with RowSignature

use of org.apache.druid.segment.column.RowSignature in project druid by druid-io.

the class DruidUnionDataSourceRel method toDruidQuery.

@Override
public DruidQuery toDruidQuery(final boolean finalizeAggregations) {
    final List<TableDataSource> dataSources = new ArrayList<>();
    RowSignature signature = null;
    for (final RelNode relNode : unionRel.getInputs()) {
        final DruidRel<?> druidRel = (DruidRel<?>) relNode;
        if (!DruidRels.isScanOrMapping(druidRel, false)) {
            getPlannerContext().setPlanningError("SQL requires union between inputs that are not simple table scans " + "and involve a filter or aliasing");
            throw new CannotBuildQueryException(druidRel);
        }
        final DruidQuery query = druidRel.toDruidQuery(false);
        final DataSource dataSource = query.getDataSource();
        if (!(dataSource instanceof TableDataSource)) {
            getPlannerContext().setPlanningError("SQL requires union with input of '%s' type that is not supported." + " Union operation is only supported between regular tables. ", dataSource.getClass().getSimpleName());
            throw new CannotBuildQueryException(druidRel);
        }
        if (signature == null) {
            signature = query.getOutputRowSignature();
        }
        if (signature.getColumnNames().equals(query.getOutputRowSignature().getColumnNames())) {
            dataSources.add((TableDataSource) dataSource);
        } else {
            getPlannerContext().setPlanningError("There is a mismatch between the output row signature of input tables and the row signature of union output.");
            throw new CannotBuildQueryException(druidRel);
        }
    }
    if (signature == null) {
        // No inputs.
        throw new CannotBuildQueryException(unionRel);
    }
    // creation time.
    if (!signature.getColumnNames().equals(unionColumnNames)) {
        throw new CannotBuildQueryException(unionRel);
    }
    return partialQuery.build(new UnionDataSource(dataSources), signature, getPlannerContext(), getCluster().getRexBuilder(), finalizeAggregations);
}
Also used : TableDataSource(org.apache.druid.query.TableDataSource) RelNode(org.apache.calcite.rel.RelNode) ArrayList(java.util.ArrayList) RowSignature(org.apache.druid.segment.column.RowSignature) UnionDataSource(org.apache.druid.query.UnionDataSource) DataSource(org.apache.druid.query.DataSource) TableDataSource(org.apache.druid.query.TableDataSource) UnionDataSource(org.apache.druid.query.UnionDataSource)

Example 7 with RowSignature

use of org.apache.druid.segment.column.RowSignature in project druid by druid-io.

the class DruidSchema method refreshSegmentsForDataSource.

/**
 * Attempt to refresh "segmentSignatures" for a set of segments for a particular dataSource. Returns the set of
 * segments actually refreshed, which may be a subset of the asked-for set.
 */
private Set<SegmentId> refreshSegmentsForDataSource(final String dataSource, final Set<SegmentId> segments) throws IOException {
    if (!segments.stream().allMatch(segmentId -> segmentId.getDataSource().equals(dataSource))) {
        // Sanity check. We definitely expect this to pass.
        throw new ISE("'segments' must all match 'dataSource'!");
    }
    log.debug("Refreshing metadata for dataSource[%s].", dataSource);
    final long startTime = System.currentTimeMillis();
    // Segment id string -> SegmentId object.
    final Map<String, SegmentId> segmentIdMap = Maps.uniqueIndex(segments, SegmentId::toString);
    final Set<SegmentId> retVal = new HashSet<>();
    final Sequence<SegmentAnalysis> sequence = runSegmentMetadataQuery(Iterables.limit(segments, MAX_SEGMENTS_PER_QUERY));
    Yielder<SegmentAnalysis> yielder = Yielders.each(sequence);
    try {
        while (!yielder.isDone()) {
            final SegmentAnalysis analysis = yielder.get();
            final SegmentId segmentId = segmentIdMap.get(analysis.getId());
            if (segmentId == null) {
                log.warn("Got analysis for segment[%s] we didn't ask for, ignoring.", analysis.getId());
            } else {
                final RowSignature rowSignature = analysisToRowSignature(analysis);
                log.debug("Segment[%s] has signature[%s].", segmentId, rowSignature);
                segmentMetadataInfo.compute(dataSource, (datasourceKey, dataSourceSegments) -> {
                    if (dataSourceSegments == null) {
                        // Datasource may have been removed or become unavailable while this refresh was ongoing.
                        log.warn("No segment map found with datasource[%s], skipping refresh of segment[%s]", datasourceKey, segmentId);
                        return null;
                    } else {
                        dataSourceSegments.compute(segmentId, (segmentIdKey, segmentMetadata) -> {
                            if (segmentMetadata == null) {
                                log.warn("No segment[%s] found, skipping refresh", segmentId);
                                return null;
                            } else {
                                final AvailableSegmentMetadata updatedSegmentMetadata = AvailableSegmentMetadata.from(segmentMetadata).withRowSignature(rowSignature).withNumRows(analysis.getNumRows()).build();
                                retVal.add(segmentId);
                                return updatedSegmentMetadata;
                            }
                        });
                        if (dataSourceSegments.isEmpty()) {
                            return null;
                        } else {
                            return dataSourceSegments;
                        }
                    }
                });
            }
            yielder = yielder.next(null);
        }
    } finally {
        yielder.close();
    }
    log.debug("Refreshed metadata for dataSource[%s] in %,d ms (%d segments queried, %d segments left).", dataSource, System.currentTimeMillis() - startTime, retVal.size(), segments.size() - retVal.size());
    return retVal;
}
Also used : SegmentManager(org.apache.druid.server.SegmentManager) Inject(com.google.inject.Inject) LifecycleStart(org.apache.druid.java.util.common.lifecycle.LifecycleStart) AllColumnIncluderator(org.apache.druid.query.metadata.metadata.AllColumnIncluderator) FluentIterable(com.google.common.collect.FluentIterable) Map(java.util.Map) ServerType(org.apache.druid.server.coordination.ServerType) ManageLifecycle(org.apache.druid.guice.ManageLifecycle) TimelineServerView(org.apache.druid.client.TimelineServerView) EnumSet(java.util.EnumSet) DateTimes(org.apache.druid.java.util.common.DateTimes) Sequence(org.apache.druid.java.util.common.guava.Sequence) ImmutableSet(com.google.common.collect.ImmutableSet) Execs(org.apache.druid.java.util.common.concurrent.Execs) ImmutableMap(com.google.common.collect.ImmutableMap) GuardedBy(com.google.errorprone.annotations.concurrent.GuardedBy) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Table(org.apache.calcite.schema.Table) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) Interner(com.google.common.collect.Interner) PlannerConfig(org.apache.druid.sql.calcite.planner.PlannerConfig) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) CountDownLatch(java.util.concurrent.CountDownLatch) SegmentMetadataQuery(org.apache.druid.query.metadata.metadata.SegmentMetadataQuery) BrokerInternalQueryConfig(org.apache.druid.client.BrokerInternalQueryConfig) DataSegment(org.apache.druid.timeline.DataSegment) Optional(java.util.Optional) SegmentId(org.apache.druid.timeline.SegmentId) QueryLifecycleFactory(org.apache.druid.server.QueryLifecycleFactory) Iterables(com.google.common.collect.Iterables) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) JoinableFactory(org.apache.druid.segment.join.JoinableFactory) Function(java.util.function.Function) TreeSet(java.util.TreeSet) ConcurrentMap(java.util.concurrent.ConcurrentMap) ServerView(org.apache.druid.client.ServerView) Yielders(org.apache.druid.java.util.common.guava.Yielders) MultipleSpecificSegmentSpec(org.apache.druid.query.spec.MultipleSpecificSegmentSpec) HashSet(java.util.HashSet) LifecycleStop(org.apache.druid.java.util.common.lifecycle.LifecycleStop) DruidServerMetadata(org.apache.druid.server.coordination.DruidServerMetadata) SegmentAnalysis(org.apache.druid.query.metadata.metadata.SegmentAnalysis) Predicates(com.google.common.base.Predicates) AbstractSchema(org.apache.calcite.schema.impl.AbstractSchema) StreamSupport(java.util.stream.StreamSupport) Yielder(org.apache.druid.java.util.common.guava.Yielder) ExecutorService(java.util.concurrent.ExecutorService) Access(org.apache.druid.server.security.Access) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) Interners(com.google.common.collect.Interners) IOException(java.io.IOException) Maps(com.google.common.collect.Maps) TableDataSource(org.apache.druid.query.TableDataSource) ColumnAnalysis(org.apache.druid.query.metadata.metadata.ColumnAnalysis) ConcurrentSkipListMap(java.util.concurrent.ConcurrentSkipListMap) TreeMap(java.util.TreeMap) DruidTable(org.apache.druid.sql.calcite.table.DruidTable) RowSignature(org.apache.druid.segment.column.RowSignature) ColumnType(org.apache.druid.segment.column.ColumnType) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Escalator(org.apache.druid.server.security.Escalator) Comparator(java.util.Comparator) SegmentId(org.apache.druid.timeline.SegmentId) ISE(org.apache.druid.java.util.common.ISE) SegmentAnalysis(org.apache.druid.query.metadata.metadata.SegmentAnalysis) RowSignature(org.apache.druid.segment.column.RowSignature) HashSet(java.util.HashSet)

Example 8 with RowSignature

use of org.apache.druid.segment.column.RowSignature in project druid by druid-io.

the class DruidSchema method buildDruidTable.

@VisibleForTesting
DruidTable buildDruidTable(final String dataSource) {
    ConcurrentSkipListMap<SegmentId, AvailableSegmentMetadata> segmentsMap = segmentMetadataInfo.get(dataSource);
    final Map<String, ColumnType> columnTypes = new TreeMap<>();
    if (segmentsMap != null) {
        for (AvailableSegmentMetadata availableSegmentMetadata : segmentsMap.values()) {
            final RowSignature rowSignature = availableSegmentMetadata.getRowSignature();
            if (rowSignature != null) {
                for (String column : rowSignature.getColumnNames()) {
                    // Newer column types should override older ones.
                    final ColumnType columnType = rowSignature.getColumnType(column).orElseThrow(() -> new ISE("Encountered null type for column[%s]", column));
                    columnTypes.putIfAbsent(column, columnType);
                }
            }
        }
    }
    final RowSignature.Builder builder = RowSignature.builder();
    columnTypes.forEach(builder::add);
    final TableDataSource tableDataSource;
    // to be a GlobalTableDataSource instead of a TableDataSource, it must appear on all servers (inferred by existing
    // in the segment cache, which in this case belongs to the broker meaning only broadcast segments live here)
    // to be joinable, it must be possibly joinable according to the factory. we only consider broadcast datasources
    // at this time, and isGlobal is currently strongly coupled with joinable, so only make a global table datasource
    // if also joinable
    final GlobalTableDataSource maybeGlobal = new GlobalTableDataSource(dataSource);
    final boolean isJoinable = joinableFactory.isDirectlyJoinable(maybeGlobal);
    final boolean isBroadcast = segmentManager.getDataSourceNames().contains(dataSource);
    if (isBroadcast && isJoinable) {
        tableDataSource = maybeGlobal;
    } else {
        tableDataSource = new TableDataSource(dataSource);
    }
    return new DruidTable(tableDataSource, builder.build(), null, isJoinable, isBroadcast);
}
Also used : ColumnType(org.apache.druid.segment.column.ColumnType) SegmentId(org.apache.druid.timeline.SegmentId) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) DruidTable(org.apache.druid.sql.calcite.table.DruidTable) TreeMap(java.util.TreeMap) GlobalTableDataSource(org.apache.druid.query.GlobalTableDataSource) TableDataSource(org.apache.druid.query.TableDataSource) ISE(org.apache.druid.java.util.common.ISE) RowSignature(org.apache.druid.segment.column.RowSignature) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 9 with RowSignature

use of org.apache.druid.segment.column.RowSignature in project druid by druid-io.

the class DruidSchema method analysisToRowSignature.

@VisibleForTesting
static RowSignature analysisToRowSignature(final SegmentAnalysis analysis) {
    final RowSignature.Builder rowSignatureBuilder = RowSignature.builder();
    for (Map.Entry<String, ColumnAnalysis> entry : analysis.getColumns().entrySet()) {
        if (entry.getValue().isError()) {
            // Skip columns with analysis errors.
            continue;
        }
        ColumnType valueType = entry.getValue().getTypeSignature();
        // flavor of COMPLEX.
        if (valueType == null) {
            // likelyhood of upgrading from some version lower than 0.23 is low
            try {
                valueType = ColumnType.fromString(entry.getValue().getType());
            } catch (IllegalArgumentException ignored) {
                valueType = ColumnType.UNKNOWN_COMPLEX;
            }
        }
        rowSignatureBuilder.add(entry.getKey(), valueType);
    }
    return ROW_SIGNATURE_INTERNER.intern(rowSignatureBuilder.build());
}
Also used : ColumnType(org.apache.druid.segment.column.ColumnType) ColumnAnalysis(org.apache.druid.query.metadata.metadata.ColumnAnalysis) RowSignature(org.apache.druid.segment.column.RowSignature) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) ConcurrentSkipListMap(java.util.concurrent.ConcurrentSkipListMap) TreeMap(java.util.TreeMap) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 10 with RowSignature

use of org.apache.druid.segment.column.RowSignature in project druid by druid-io.

the class DruidUnionDataSourceRule method getColumnNamesIfTableOrUnion.

static Optional<List<String>> getColumnNamesIfTableOrUnion(final DruidRel<?> druidRel, @Nullable PlannerContext plannerContext) {
    final PartialDruidQuery partialQuery = druidRel.getPartialDruidQuery();
    final Optional<DruidTable> druidTable = DruidRels.druidTableIfLeafRel(druidRel).filter(table -> table.getDataSource() instanceof TableDataSource);
    if (druidTable.isPresent() && DruidRels.isScanOrMapping(druidRel, false)) {
        if (partialQuery.stage() == PartialDruidQuery.Stage.SCAN) {
            return Optional.of(druidTable.get().getRowSignature().getColumnNames());
        } else {
            // Sanity check. Expected to be true due to the "scan or mapping" check.
            if (partialQuery.stage() != PartialDruidQuery.Stage.SELECT_PROJECT) {
                throw new ISE("Expected stage %s but got %s", PartialDruidQuery.Stage.SELECT_PROJECT, partialQuery.stage());
            }
            // Apply the mapping (with additional sanity checks).
            final RowSignature tableSignature = druidTable.get().getRowSignature();
            final Mappings.TargetMapping mapping = partialQuery.getSelectProject().getMapping();
            if (mapping.getSourceCount() != tableSignature.size()) {
                throw new ISE("Expected mapping with %d columns but got %d columns", tableSignature.size(), mapping.getSourceCount());
            }
            final List<String> retVal = new ArrayList<>();
            for (int i = 0; i < mapping.getTargetCount(); i++) {
                final int sourceField = mapping.getSourceOpt(i);
                retVal.add(tableSignature.getColumnName(sourceField));
            }
            return Optional.of(retVal);
        }
    } else if (!druidTable.isPresent() && druidRel instanceof DruidUnionDataSourceRel) {
        return Optional.of(((DruidUnionDataSourceRel) druidRel).getUnionColumnNames());
    } else if (druidTable.isPresent()) {
        if (null != plannerContext) {
            plannerContext.setPlanningError("SQL requires union between inputs that are not simple table scans " + "and involve a filter or aliasing. Or column types of tables being unioned are not of same type.");
        }
        return Optional.empty();
    } else {
        if (null != plannerContext) {
            plannerContext.setPlanningError("SQL requires union with input of a datasource type that is not supported." + " Union operation is only supported between regular tables. ");
        }
        return Optional.empty();
    }
}
Also used : ArrayList(java.util.ArrayList) DruidTable(org.apache.druid.sql.calcite.table.DruidTable) DruidUnionDataSourceRel(org.apache.druid.sql.calcite.rel.DruidUnionDataSourceRel) TableDataSource(org.apache.druid.query.TableDataSource) Mappings(org.apache.calcite.util.mapping.Mappings) ISE(org.apache.druid.java.util.common.ISE) PartialDruidQuery(org.apache.druid.sql.calcite.rel.PartialDruidQuery) RowSignature(org.apache.druid.segment.column.RowSignature)

Aggregations

RowSignature (org.apache.druid.segment.column.RowSignature)46 ColumnType (org.apache.druid.segment.column.ColumnType)17 List (java.util.List)14 Test (org.junit.Test)13 Collectors (java.util.stream.Collectors)12 Nullable (javax.annotation.Nullable)11 PlannerContext (org.apache.druid.sql.calcite.planner.PlannerContext)11 DruidExpression (org.apache.druid.sql.calcite.expression.DruidExpression)10 Expressions (org.apache.druid.sql.calcite.expression.Expressions)10 Project (org.apache.calcite.rel.core.Project)9 RexLiteral (org.apache.calcite.rex.RexLiteral)9 RexNode (org.apache.calcite.rex.RexNode)9 SqlKind (org.apache.calcite.sql.SqlKind)9 ISE (org.apache.druid.java.util.common.ISE)9 Aggregation (org.apache.druid.sql.calcite.aggregation.Aggregation)9 ArrayList (java.util.ArrayList)8 StringUtils (org.apache.druid.java.util.common.StringUtils)8 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)8 ImmutableList (com.google.common.collect.ImmutableList)6 ImmutableSet (com.google.common.collect.ImmutableSet)6