Search in sources :

Example 16 with Pair

use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.

the class StatementAnalyzer method validateCreateIndex.

private void validateCreateIndex(Table table, Optional<Scope> scope) {
    CreateIndex createIndex = (CreateIndex) analysis.getOriginalStatement();
    QualifiedObjectName tableFullName = createQualifiedObjectName(session, createIndex, createIndex.getTableName());
    accessControl.checkCanCreateIndex(session.getRequiredTransactionId(), session.getIdentity(), tableFullName);
    String tableName = tableFullName.toString();
    // check whether catalog support create index
    if (!metadata.isHeuristicIndexSupported(session, tableFullName)) {
        throw new SemanticException(NOT_SUPPORTED, createIndex, "CREATE INDEX is not supported in catalog '%s'", tableFullName.getCatalogName());
    }
    List<String> partitions = new ArrayList<>();
    String partitionColumn = null;
    if (createIndex.getExpression().isPresent()) {
        partitions = HeuristicIndexUtils.extractPartitions(createIndex.getExpression().get());
        // check partition name validate, create index …… where pt_d = xxx;
        // pt_d must be partition column
        Set<String> partitionColumns = partitions.stream().map(k -> k.substring(0, k.indexOf("="))).collect(Collectors.toSet());
        if (partitionColumns.size() > 1) {
            // currently only support one partition column
            throw new IllegalArgumentException("Heuristic index only supports predicates on one column");
        }
        // The only entry in set should be the only partition column name
        partitionColumn = partitionColumns.iterator().next();
    }
    Optional<TableHandle> tableHandle = metadata.getTableHandle(session, tableFullName);
    if (tableHandle.isPresent()) {
        if (!tableHandle.get().getConnectorHandle().isHeuristicIndexSupported()) {
            throw new SemanticException(NOT_SUPPORTED, table, "Catalog supported, but table storage format is not supported by heuristic index");
        }
        TableMetadata tableMetadata = metadata.getTableMetadata(session, tableHandle.get());
        List<String> availableColumns = tableMetadata.getColumns().stream().map(ColumnMetadata::getName).collect(Collectors.toList());
        for (Identifier column : createIndex.getColumnAliases()) {
            if (!availableColumns.contains(column.getValue().toLowerCase(Locale.ROOT))) {
                throw new SemanticException(MISSING_ATTRIBUTE, table, "Column '%s' cannot be resolved", column.getValue());
            }
        }
        if (partitionColumn != null && !tableHandle.get().getConnectorHandle().isPartitionColumn(partitionColumn)) {
            throw new SemanticException(NOT_SUPPORTED, table, "Heuristic index creation is only supported for predicates on partition columns");
        }
    } else {
        throw new SemanticException(MISSING_ATTRIBUTE, table, "Table '%s' is invalid", tableFullName);
    }
    List<Pair<String, Type>> indexColumns = new LinkedList<>();
    for (Identifier i : createIndex.getColumnAliases()) {
        indexColumns.add(new Pair<>(i.toString(), UNKNOWN));
    }
    // For now, creating index for multiple columns is not supported
    if (indexColumns.size() > 1) {
        throw new SemanticException(NOT_SUPPORTED, table, "Multi-column indexes are currently not supported");
    }
    try {
        // Use this place holder to check the existence of index and lock the place
        Properties properties = new Properties();
        properties.setProperty(INPROGRESS_PROPERTY_KEY, "TRUE");
        CreateIndexMetadata placeHolder = new CreateIndexMetadata(createIndex.getIndexName().toString(), tableName, createIndex.getIndexType(), 0L, indexColumns, partitions, properties, session.getUser(), UNDEFINED);
        synchronized (StatementAnalyzer.class) {
            IndexClient.RecordStatus recordStatus = heuristicIndexerManager.getIndexClient().lookUpIndexRecord(placeHolder);
            switch(recordStatus) {
                case SAME_NAME:
                    throw new SemanticException(INDEX_ALREADY_EXISTS, createIndex, "Index '%s' already exists", createIndex.getIndexName().toString());
                case SAME_CONTENT:
                    throw new SemanticException(INDEX_ALREADY_EXISTS, createIndex, "Index with same (table,column,indexType) already exists");
                case SAME_INDEX_PART_CONFLICT:
                    throw new SemanticException(INDEX_ALREADY_EXISTS, createIndex, "Index with same (table,column,indexType) already exists and partition(s) contain conflicts");
                case IN_PROGRESS_SAME_NAME:
                    throw new SemanticException(INDEX_ALREADY_EXISTS, createIndex, "Index '%s' is being created by another user. Check running queries for details. If there is no running query for this index, " + "the index may be in an unexpected error state and should be dropped using 'DROP INDEX %s'", createIndex.getIndexName().toString(), createIndex.getIndexName().toString());
                case IN_PROGRESS_SAME_CONTENT:
                    throw new SemanticException(INDEX_ALREADY_EXISTS, createIndex, "Index with same (table,column,indexType) is being created by another user. Check running queries for details. " + "If there is no running query for this index, the index may be in an unexpected error state and should be dropped using 'DROP INDEX'");
                case IN_PROGRESS_SAME_INDEX_PART_CONFLICT:
                    if (partitions.isEmpty()) {
                        throw new SemanticException(INDEX_ALREADY_EXISTS, createIndex, "Index with same (table,column,indexType) is being created by another user. Check running queries for details. " + "If there is no running query for this index, the index may be in an unexpected error state and should be dropped using 'DROP INDEX %s'", createIndex.getIndexName().toString());
                    }
                // allow different queries to run with explicitly same partitions
                case SAME_INDEX_PART_CAN_MERGE:
                case IN_PROGRESS_SAME_INDEX_PART_CAN_MERGE:
                    break;
                case NOT_FOUND:
                    heuristicIndexerManager.getIndexClient().addIndexRecord(placeHolder);
            }
        }
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
}
Also used : CreateSchema(io.prestosql.sql.tree.CreateSchema) AggregationAnalyzer.verifyOrderByAggregations(io.prestosql.sql.analyzer.AggregationAnalyzer.verifyOrderByAggregations) OperatorNotFoundException(io.prestosql.metadata.OperatorNotFoundException) INVALID_FUNCTION_ARGUMENT(io.prestosql.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT) Prepare(io.prestosql.sql.tree.Prepare) HeuristicIndexUtils(io.prestosql.utils.HeuristicIndexUtils) Statement(io.prestosql.sql.tree.Statement) INDEX_ALREADY_EXISTS(io.prestosql.sql.analyzer.SemanticErrorCode.INDEX_ALREADY_EXISTS) WarningCollector(io.prestosql.execution.warnings.WarningCollector) Execute(io.prestosql.sql.tree.Execute) Map(java.util.Map) RowType(io.prestosql.spi.type.RowType) ArrayConstructor(io.prestosql.sql.tree.ArrayConstructor) FetchFirst(io.prestosql.sql.tree.FetchFirst) TOO_MANY_ARGUMENTS(io.prestosql.sql.analyzer.SemanticErrorCode.TOO_MANY_ARGUMENTS) ENGLISH(java.util.Locale.ENGLISH) Identifier(io.prestosql.sql.tree.Identifier) Cube(io.prestosql.sql.tree.Cube) RenameColumn(io.prestosql.sql.tree.RenameColumn) HeuristicIndexerManager(io.prestosql.heuristicindex.HeuristicIndexerManager) INPROGRESS_PROPERTY_KEY(io.prestosql.spi.heuristicindex.IndexRecord.INPROGRESS_PROPERTY_KEY) AccessControl(io.prestosql.security.AccessControl) Delete(io.prestosql.sql.tree.Delete) SystemSessionProperties.getMaxGroupingSets(io.prestosql.SystemSessionProperties.getMaxGroupingSets) GroupingElement(io.prestosql.sql.tree.GroupingElement) Collectors.joining(java.util.stream.Collectors.joining) Insert(io.prestosql.sql.tree.Insert) DropView(io.prestosql.sql.tree.DropView) ExpressionUtils(io.prestosql.sql.ExpressionUtils) ParsingException(io.prestosql.sql.parser.ParsingException) LongLiteral(io.prestosql.sql.tree.LongLiteral) Call(io.prestosql.sql.tree.Call) ScopeReferenceExtractor.hasReferencesToScope(io.prestosql.sql.analyzer.ScopeReferenceExtractor.hasReferencesToScope) SqlPath(io.prestosql.sql.SqlPath) NodeUtils.mapFromProperties(io.prestosql.sql.NodeUtils.mapFromProperties) Joiner(com.google.common.base.Joiner) ExpressionInterpreter.expressionOptimizer(io.prestosql.sql.planner.ExpressionInterpreter.expressionOptimizer) FunctionKind(io.prestosql.spi.function.FunctionKind) INVALID_WINDOW_FRAME(io.prestosql.sql.analyzer.SemanticErrorCode.INVALID_WINDOW_FRAME) ExpressionInterpreter(io.prestosql.sql.planner.ExpressionInterpreter) COLUMN_NAME_NOT_SPECIFIED(io.prestosql.sql.analyzer.SemanticErrorCode.COLUMN_NAME_NOT_SPECIFIED) VacuumTable(io.prestosql.sql.tree.VacuumTable) DropTable(io.prestosql.sql.tree.DropTable) AllColumns(io.prestosql.sql.tree.AllColumns) Node(io.prestosql.sql.tree.Node) ResetSession(io.prestosql.sql.tree.ResetSession) QualifiedObjectName(io.prestosql.spi.connector.QualifiedObjectName) MISSING_SCHEMA(io.prestosql.sql.analyzer.SemanticErrorCode.MISSING_SCHEMA) OptionalLong(java.util.OptionalLong) Deallocate(io.prestosql.sql.tree.Deallocate) MISSING_INDEX(io.prestosql.sql.analyzer.SemanticErrorCode.MISSING_INDEX) INVALID_PROCEDURE_ARGUMENTS(io.prestosql.sql.analyzer.SemanticErrorCode.INVALID_PROCEDURE_ARGUMENTS) ParsingUtil.createParsingOptions(io.prestosql.sql.ParsingUtil.createParsingOptions) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) Comment(io.prestosql.sql.tree.Comment) SelectItem(io.prestosql.sql.tree.SelectItem) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) MISSING_COLUMN(io.prestosql.sql.analyzer.SemanticErrorCode.MISSING_COLUMN) RenameTable(io.prestosql.sql.tree.RenameTable) DUPLICATE_RELATION(io.prestosql.sql.analyzer.SemanticErrorCode.DUPLICATE_RELATION) Query(io.prestosql.sql.tree.Query) ComparisonExpression(io.prestosql.sql.tree.ComparisonExpression) IOException(java.io.IOException) INVALID_COLUMN_MASK(io.prestosql.spi.StandardErrorCode.INVALID_COLUMN_MASK) Lateral(io.prestosql.sql.tree.Lateral) AGGREGATE(io.prestosql.spi.function.FunctionKind.AGGREGATE) COLUMN_TYPE_UNKNOWN(io.prestosql.sql.analyzer.SemanticErrorCode.COLUMN_TYPE_UNKNOWN) Expression(io.prestosql.sql.tree.Expression) TableSubquery(io.prestosql.sql.tree.TableSubquery) SystemSessionProperties(io.prestosql.SystemSessionProperties) Intersect(io.prestosql.sql.tree.Intersect) Analyzer.verifyNoAggregateWindowOrGroupingFunctions(io.prestosql.sql.analyzer.Analyzer.verifyNoAggregateWindowOrGroupingFunctions) SqlParser(io.prestosql.sql.parser.SqlParser) CreateCube(io.prestosql.sql.tree.CreateCube) AMBIGUOUS_ATTRIBUTE(io.prestosql.sql.analyzer.SemanticErrorCode.AMBIGUOUS_ATTRIBUTE) FieldReference(io.prestosql.sql.tree.FieldReference) DropSchema(io.prestosql.sql.tree.DropSchema) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Locale(java.util.Locale) StartTransaction(io.prestosql.sql.tree.StartTransaction) NON_NUMERIC_SAMPLE_PERCENTAGE(io.prestosql.sql.analyzer.SemanticErrorCode.NON_NUMERIC_SAMPLE_PERCENTAGE) VIEW_IS_RECURSIVE(io.prestosql.sql.analyzer.SemanticErrorCode.VIEW_IS_RECURSIVE) BOOLEAN(io.prestosql.spi.type.BooleanType.BOOLEAN) Type(io.prestosql.spi.type.Type) QuerySpecification(io.prestosql.sql.tree.QuerySpecification) Except(io.prestosql.sql.tree.Except) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) CubeMetaStore(io.hetu.core.spi.cube.io.CubeMetaStore) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) RANGE(io.prestosql.spi.sql.expression.Types.WindowFrameType.RANGE) Collection(java.util.Collection) CatalogName(io.prestosql.spi.connector.CatalogName) CubeAggregateFunction(io.hetu.core.spi.cube.CubeAggregateFunction) ExpressionTreeUtils.extractLocation(io.prestosql.sql.analyzer.ExpressionTreeUtils.extractLocation) Iterables.getLast(com.google.common.collect.Iterables.getLast) DropCache(io.prestosql.sql.tree.DropCache) Collectors(java.util.stream.Collectors) Pair(io.prestosql.spi.heuristicindex.Pair) Rollback(io.prestosql.sql.tree.Rollback) AstUtils(io.prestosql.sql.util.AstUtils) ExpressionAnalyzer.createConstantAnalyzer(io.prestosql.sql.analyzer.ExpressionAnalyzer.createConstantAnalyzer) SingleColumn(io.prestosql.sql.tree.SingleColumn) With(io.prestosql.sql.tree.With) ExplainType(io.prestosql.sql.tree.ExplainType) TypeSignature(io.prestosql.spi.type.TypeSignature) DropCube(io.prestosql.sql.tree.DropCube) DataCenterUtility(io.prestosql.connector.DataCenterUtility) UNKNOWN(io.prestosql.spi.type.UnknownType.UNKNOWN) INVALID_OFFSET_ROW_COUNT(io.prestosql.sql.analyzer.SemanticErrorCode.INVALID_OFFSET_ROW_COUNT) MISSING_CATALOG(io.prestosql.sql.analyzer.SemanticErrorCode.MISSING_CATALOG) TOO_MANY_GROUPING_SETS(io.prestosql.sql.analyzer.SemanticErrorCode.TOO_MANY_GROUPING_SETS) INVALID_ROW_FILTER(io.prestosql.spi.StandardErrorCode.INVALID_ROW_FILTER) ConnectorViewDefinition(io.prestosql.spi.connector.ConnectorViewDefinition) NOT_FOUND(io.prestosql.spi.StandardErrorCode.NOT_FOUND) TableHandle(io.prestosql.spi.metadata.TableHandle) ViewAccessControl(io.prestosql.security.ViewAccessControl) HashSet(java.util.HashSet) CubeStatus(io.hetu.core.spi.cube.CubeStatus) Values(io.prestosql.sql.tree.Values) ExpressionTreeUtils.extractWindowFunctions(io.prestosql.sql.analyzer.ExpressionTreeUtils.extractWindowFunctions) ImmutableList(com.google.common.collect.ImmutableList) FunctionCall(io.prestosql.sql.tree.FunctionCall) JoinUsing(io.prestosql.sql.tree.JoinUsing) ViewColumn(io.prestosql.spi.connector.ConnectorViewDefinition.ViewColumn) ExpressionTreeUtils.extractExpressions(io.prestosql.sql.analyzer.ExpressionTreeUtils.extractExpressions) Math.toIntExact(java.lang.Math.toIntExact) LinkedList(java.util.LinkedList) Limit(io.prestosql.sql.tree.Limit) DereferenceExpression(io.prestosql.sql.tree.DereferenceExpression) DUPLICATE_PROPERTY(io.prestosql.sql.analyzer.SemanticErrorCode.DUPLICATE_PROPERTY) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) SystemSessionProperties.isEnableStarTreeIndex(io.prestosql.SystemSessionProperties.isEnableStarTreeIndex) WithQuery(io.prestosql.sql.tree.WithQuery) Offset(io.prestosql.sql.tree.Offset) INVALID_ORDINAL(io.prestosql.sql.analyzer.SemanticErrorCode.INVALID_ORDINAL) DropIndex(io.prestosql.sql.tree.DropIndex) INVALID_FUNCTION_NAME(io.prestosql.sql.analyzer.SemanticErrorCode.INVALID_FUNCTION_NAME) Use(io.prestosql.sql.tree.Use) DISTRIBUTED(io.prestosql.sql.tree.ExplainType.Type.DISTRIBUTED) JoinOn(io.prestosql.sql.tree.JoinOn) TABLE_STATE_INCORRECT(io.prestosql.sql.analyzer.SemanticErrorCode.TABLE_STATE_INCORRECT) Table(io.prestosql.sql.tree.Table) SampledRelation(io.prestosql.sql.tree.SampledRelation) LongSupplier(java.util.function.LongSupplier) PrestoWarning(io.prestosql.spi.PrestoWarning) Relation(io.prestosql.sql.tree.Relation) TypeProvider(io.prestosql.sql.planner.TypeProvider) Property(io.prestosql.sql.tree.Property) AliasedRelation(io.prestosql.sql.tree.AliasedRelation) AccessDeniedException(io.prestosql.spi.security.AccessDeniedException) CreateTable(io.prestosql.sql.tree.CreateTable) INVALID_FETCH_FIRST_ROW_COUNT(io.prestosql.sql.analyzer.SemanticErrorCode.INVALID_FETCH_FIRST_ROW_COUNT) Join(io.prestosql.sql.tree.Join) MISMATCHED_COLUMN_ALIASES(io.prestosql.sql.analyzer.SemanticErrorCode.MISMATCHED_COLUMN_ALIASES) Row(io.prestosql.sql.tree.Row) AssignmentItem(io.prestosql.sql.tree.AssignmentItem) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) Identity(io.prestosql.spi.security.Identity) DUPLICATE_COLUMN_NAME(io.prestosql.sql.analyzer.SemanticErrorCode.DUPLICATE_COLUMN_NAME) Metadata(io.prestosql.metadata.Metadata) SetSession(io.prestosql.sql.tree.SetSession) NodeRef(io.prestosql.sql.tree.NodeRef) MISSING_CUBE(io.prestosql.sql.analyzer.SemanticErrorCode.MISSING_CUBE) UncheckedIOException(java.io.UncheckedIOException) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) CreateView(io.prestosql.sql.tree.CreateView) ExpressionTreeUtils.extractAggregateFunctions(io.prestosql.sql.analyzer.ExpressionTreeUtils.extractAggregateFunctions) DropColumn(io.prestosql.sql.tree.DropColumn) MoreLists.mappedCopy(io.prestosql.util.MoreLists.mappedCopy) StandardErrorCode(io.prestosql.spi.StandardErrorCode) STAR_TREE(io.prestosql.cube.CubeManager.STAR_TREE) Grant(io.prestosql.sql.tree.Grant) GroupingSets(io.prestosql.sql.tree.GroupingSets) INSERT_INTO_CUBE(io.prestosql.sql.analyzer.SemanticErrorCode.INSERT_INTO_CUBE) Analyze(io.prestosql.sql.tree.Analyze) Iterables(com.google.common.collect.Iterables) TableMetadata(io.prestosql.metadata.TableMetadata) MUST_BE_WINDOW_FUNCTION(io.prestosql.sql.analyzer.SemanticErrorCode.MUST_BE_WINDOW_FUNCTION) VIEW_PARSE_ERROR(io.prestosql.sql.analyzer.SemanticErrorCode.VIEW_PARSE_ERROR) CharType(io.prestosql.spi.type.CharType) TypeNotFoundException(io.prestosql.spi.type.TypeNotFoundException) NodeUtils.getSortItemsFromOrderBy(io.prestosql.sql.NodeUtils.getSortItemsFromOrderBy) ExpressionTreeRewriter(io.prestosql.sql.tree.ExpressionTreeRewriter) Types(io.prestosql.spi.sql.expression.Types) ArrayList(java.util.ArrayList) MapType(io.prestosql.spi.type.MapType) WILDCARD_WITHOUT_FROM(io.prestosql.sql.analyzer.SemanticErrorCode.WILDCARD_WITHOUT_FROM) PRECEDING(io.prestosql.spi.sql.expression.Types.FrameBoundType.PRECEDING) VARCHAR(io.prestosql.spi.type.VarcharType.VARCHAR) CreateTableAsSelect(io.prestosql.sql.tree.CreateTableAsSelect) FOLLOWING(io.prestosql.spi.sql.expression.Types.FrameBoundType.FOLLOWING) Session(io.prestosql.Session) ExpressionDeterminismEvaluator.isDeterministic(io.prestosql.sql.planner.ExpressionDeterminismEvaluator.isDeterministic) MISSING_ATTRIBUTE(io.prestosql.sql.analyzer.SemanticErrorCode.MISSING_ATTRIBUTE) CatalogSchemaName(io.prestosql.spi.connector.CatalogSchemaName) IndexRecord(io.prestosql.spi.heuristicindex.IndexRecord) REDUNDANT_ORDER_BY(io.prestosql.spi.connector.StandardWarningCode.REDUNDANT_ORDER_BY) VIEW_IS_STALE(io.prestosql.sql.analyzer.SemanticErrorCode.VIEW_IS_STALE) SetOperation(io.prestosql.sql.tree.SetOperation) Properties(java.util.Properties) WINDOW(io.prestosql.spi.function.FunctionKind.WINDOW) TYPE_MISMATCH(io.prestosql.sql.analyzer.SemanticErrorCode.TYPE_MISMATCH) Throwables.throwIfInstanceOf(com.google.common.base.Throwables.throwIfInstanceOf) CubeManager(io.prestosql.cube.CubeManager) Explain(io.prestosql.sql.tree.Explain) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) CreateIndexMetadata(io.prestosql.spi.connector.CreateIndexMetadata) MISMATCHED_SET_COLUMN_TYPES(io.prestosql.sql.analyzer.SemanticErrorCode.MISMATCHED_SET_COLUMN_TYPES) CubeMetadata(io.hetu.core.spi.cube.CubeMetadata) AddColumn(io.prestosql.sql.tree.AddColumn) VarcharType(io.prestosql.spi.type.VarcharType) JoinCriteria(io.prestosql.sql.tree.JoinCriteria) Unnest(io.prestosql.sql.tree.Unnest) CURRENT_ROW(io.prestosql.spi.sql.expression.Types.FrameBoundType.CURRENT_ROW) AggregationAnalyzer.verifySourceAggregations(io.prestosql.sql.analyzer.AggregationAnalyzer.verifySourceAggregations) AllowAllAccessControl(io.prestosql.security.AllowAllAccessControl) Iterables.transform(com.google.common.collect.Iterables.transform) UNBOUNDED_PRECEDING(io.prestosql.spi.sql.expression.Types.FrameBoundType.UNBOUNDED_PRECEDING) QualifiedName(io.prestosql.sql.tree.QualifiedName) FunctionProperty(io.prestosql.sql.tree.FunctionProperty) DefaultTraversalVisitor(io.prestosql.sql.tree.DefaultTraversalVisitor) NOT_SUPPORTED(io.prestosql.sql.analyzer.SemanticErrorCode.NOT_SUPPORTED) RenameSchema(io.prestosql.sql.tree.RenameSchema) UNDEFINED(io.prestosql.spi.connector.CreateIndexMetadata.Level.UNDEFINED) Select(io.prestosql.sql.tree.Select) Rollup(io.prestosql.sql.tree.Rollup) WindowFrame(io.prestosql.sql.tree.WindowFrame) OperatorType(io.prestosql.spi.function.OperatorType) IndexClient(io.prestosql.spi.heuristicindex.IndexClient) Window(io.prestosql.sql.tree.Window) TABLE_ALREADY_EXISTS(io.prestosql.sql.analyzer.SemanticErrorCode.TABLE_ALREADY_EXISTS) TypeCoercion(io.prestosql.type.TypeCoercion) Commit(io.prestosql.sql.tree.Commit) UNBOUNDED_FOLLOWING(io.prestosql.spi.sql.expression.Types.FrameBoundType.UNBOUNDED_FOLLOWING) SymbolsExtractor(io.prestosql.sql.planner.SymbolsExtractor) ImmutableMap(com.google.common.collect.ImmutableMap) GroupingOperation(io.prestosql.sql.tree.GroupingOperation) Collections.emptyList(java.util.Collections.emptyList) ArrayType(io.prestosql.spi.type.ArrayType) CreateIndex(io.prestosql.sql.tree.CreateIndex) GroupBy(io.prestosql.sql.tree.GroupBy) ViewExpression(io.prestosql.spi.security.ViewExpression) NESTED_WINDOW(io.prestosql.sql.analyzer.SemanticErrorCode.NESTED_WINDOW) NaturalJoin(io.prestosql.sql.tree.NaturalJoin) Sets(com.google.common.collect.Sets) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) List(java.util.List) INVALID_LIMIT_ROW_COUNT(io.prestosql.sql.analyzer.SemanticErrorCode.INVALID_LIMIT_ROW_COUNT) NONDETERMINISTIC_ORDER_BY_EXPRESSION_WITH_SELECT_DISTINCT(io.prestosql.sql.analyzer.SemanticErrorCode.NONDETERMINISTIC_ORDER_BY_EXPRESSION_WITH_SELECT_DISTINCT) SimpleGroupBy(io.prestosql.sql.tree.SimpleGroupBy) Optional(java.util.Optional) FrameBound(io.prestosql.sql.tree.FrameBound) MetadataUtil.createQualifiedObjectName(io.prestosql.metadata.MetadataUtil.createQualifiedObjectName) UpdateIndex(io.prestosql.sql.tree.UpdateIndex) MISSING_ORDER_BY(io.prestosql.sql.analyzer.SemanticErrorCode.MISSING_ORDER_BY) CUBE_NOT_FOUND(io.prestosql.spi.connector.StandardWarningCode.CUBE_NOT_FOUND) ORDER_BY_MUST_BE_IN_SELECT(io.prestosql.sql.analyzer.SemanticErrorCode.ORDER_BY_MUST_BE_IN_SELECT) HashMap(java.util.HashMap) Multimap(com.google.common.collect.Multimap) Objects.requireNonNull(java.util.Objects.requireNonNull) ExpressionRewriter(io.prestosql.sql.tree.ExpressionRewriter) SortItem(io.prestosql.sql.tree.SortItem) VerifyException(com.google.common.base.VerifyException) Iterator(java.util.Iterator) OrderBy(io.prestosql.sql.tree.OrderBy) MISSING_TABLE(io.prestosql.sql.analyzer.SemanticErrorCode.MISSING_TABLE) VIEW_ANALYSIS_ERROR(io.prestosql.sql.analyzer.SemanticErrorCode.VIEW_ANALYSIS_ERROR) Update(io.prestosql.sql.tree.Update) InsertCube(io.prestosql.sql.tree.InsertCube) Revoke(io.prestosql.sql.tree.Revoke) TableMetadata(io.prestosql.metadata.TableMetadata) CreateIndexMetadata(io.prestosql.spi.connector.CreateIndexMetadata) IndexClient(io.prestosql.spi.heuristicindex.IndexClient) ArrayList(java.util.ArrayList) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) NodeUtils.mapFromProperties(io.prestosql.sql.NodeUtils.mapFromProperties) SystemSessionProperties(io.prestosql.SystemSessionProperties) Properties(java.util.Properties) QualifiedObjectName(io.prestosql.spi.connector.QualifiedObjectName) MetadataUtil.createQualifiedObjectName(io.prestosql.metadata.MetadataUtil.createQualifiedObjectName) LinkedList(java.util.LinkedList) CreateIndex(io.prestosql.sql.tree.CreateIndex) Identifier(io.prestosql.sql.tree.Identifier) TableHandle(io.prestosql.spi.metadata.TableHandle) Pair(io.prestosql.spi.heuristicindex.Pair)

Example 17 with Pair

use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.

the class SourcePartitionedScheduler method schedule.

@Override
public synchronized ScheduleResult schedule(int maxSplitGroup) {
    dropListenersFromWhenFinishedOrNewLifespansAdded();
    int overallSplitAssignmentCount = 0;
    ImmutableSet.Builder<RemoteTask> overallNewTasks = ImmutableSet.builder();
    List<ListenableFuture<?>> overallBlockedFutures = new ArrayList<>();
    boolean anyBlockedOnPlacements = false;
    boolean anyBlockedOnNextSplitBatch = false;
    boolean anyNotBlocked = false;
    boolean applyFilter = isHeuristicIndexFilterEnabled(session) && SplitFiltering.isSplitFilterApplicable(stage);
    boolean initialMarker = false;
    for (Entry<Lifespan, ScheduleGroup> entry : scheduleGroups.entrySet()) {
        Lifespan lifespan = entry.getKey();
        ScheduleGroup scheduleGroup = entry.getValue();
        Set<Split> pendingSplits = scheduleGroup.pendingSplits;
        if (scheduleGroup.state == ScheduleGroupState.NO_MORE_SPLITS || scheduleGroup.state == ScheduleGroupState.DONE) {
            verify(scheduleGroup.nextSplitBatchFuture == null);
        } else if (pendingSplits.isEmpty()) {
            // try to get the next batch
            if (scheduleGroup.nextSplitBatchFuture == null) {
                scheduleGroup.nextSplitBatchFuture = splitSource.getNextBatch(scheduleGroup.partitionHandle, lifespan, splitBatchSize - pendingSplits.size());
                long start = System.nanoTime();
                addSuccessCallback(scheduleGroup.nextSplitBatchFuture, () -> stage.recordGetSplitTime(start));
            }
            if (scheduleGroup.nextSplitBatchFuture.isDone()) {
                SplitBatch nextSplits = getFutureValue(scheduleGroup.nextSplitBatchFuture);
                scheduleGroup.nextSplitBatchFuture = null;
                // add split filter to filter out split has no valid rows
                Pair<Optional<RowExpression>, Map<Symbol, ColumnHandle>> pair = SplitFiltering.getExpression(stage);
                if (SystemSessionProperties.isSnapshotEnabled(session)) {
                    List<Split> batchSplits = nextSplits.getSplits();
                    // Don't apply filter to MarkerSplit
                    if (batchSplits.size() == 1 && batchSplits.get(0).getConnectorSplit() instanceof MarkerSplit) {
                        applyFilter = false;
                    }
                }
                List<Split> filteredSplit = applyFilter ? SplitFiltering.getFilteredSplit(pair.getFirst(), SplitFiltering.getFullyQualifiedName(stage), pair.getSecond(), nextSplits, heuristicIndexerManager) : nextSplits.getSplits();
                // In case of ORC small size files/splits are grouped
                List<Split> groupedSmallFilesList = splitSource.groupSmallSplits(filteredSplit, lifespan, maxSplitGroup);
                filteredSplit = groupedSmallFilesList;
                pendingSplits.addAll(filteredSplit);
                if (nextSplits.isLastBatch()) {
                    if (scheduleGroup.state == ScheduleGroupState.INITIALIZED && pendingSplits.isEmpty()) {
                        // Add an empty split in case no splits have been produced for the source.
                        // For source operators, they never take input, but they may produce output.
                        // This is well handled by Presto execution engine.
                        // However, there are certain non-source operators that may produce output without any input,
                        // for example, 1) an AggregationOperator, 2) a HashAggregationOperator where one of the grouping sets is ().
                        // Scheduling an empty split kicks off necessary driver instantiation to make this work.
                        pendingSplits.add(new Split(splitSource.getCatalogName(), new EmptySplit(splitSource.getCatalogName()), lifespan));
                    }
                    scheduleGroup.state = ScheduleGroupState.NO_MORE_SPLITS;
                }
            } else {
                overallBlockedFutures.add(scheduleGroup.nextSplitBatchFuture);
                anyBlockedOnNextSplitBatch = true;
                continue;
            }
        }
        Multimap<InternalNode, Split> splitAssignment = ImmutableMultimap.of();
        if (!pendingSplits.isEmpty()) {
            if (!scheduleGroup.placementFuture.isDone()) {
                anyBlockedOnPlacements = true;
                continue;
            }
            if (scheduleGroup.state == ScheduleGroupState.INITIALIZED) {
                scheduleGroup.state = ScheduleGroupState.SPLITS_ADDED;
            }
            if (state == State.INITIALIZED) {
                state = State.SPLITS_ADDED;
            }
            // calculate placements for splits
            SplitPlacementResult splitPlacementResult;
            if (stage.isThrottledSchedule()) {
                // If asked for partial schedule incase of lesser resource, then schedule only 10% of splits.
                // 10% is calculated on initial number of splits and same is being used on subsequent schedule also.
                // But if later 10% of current pending splits more than earlier 10%, then it will schedule max of
                // these.
                // if throttledSplitsCount is more than number of pendingSplits, then it will schedule all.
                throttledSplitsCount = Math.max((int) Math.ceil(pendingSplits.size() * ALLOWED_PERCENT_LIMIT), throttledSplitsCount);
                splitPlacementResult = splitPlacementPolicy.computeAssignments(ImmutableSet.copyOf(Iterables.limit(pendingSplits, throttledSplitsCount)), this.stage);
            } else {
                splitPlacementResult = splitPlacementPolicy.computeAssignments(new HashSet<>(pendingSplits), this.stage);
            }
            splitAssignment = splitPlacementResult.getAssignments();
            if (SystemSessionProperties.isSnapshotEnabled(session)) {
                Split firstSplit = pendingSplits.iterator().next();
                if (pendingSplits.size() == 1 && firstSplit.getConnectorSplit() instanceof MarkerSplit) {
                    // We'll create a new assignment, but still need to call computeAssignments above, and cannot modify the returned assignment map directly
                    splitAssignment = HashMultimap.create(splitAssignment);
                    splitAssignment.values().remove(firstSplit);
                    // Getting all internalNodes and assigning marker splits to all of them.
                    List<InternalNode> allNodes = splitPlacementPolicy.allNodes();
                    for (InternalNode node : allNodes) {
                        splitAssignment.put(node, firstSplit);
                    }
                    MarkerSplit markerSplit = (MarkerSplit) firstSplit.getConnectorSplit();
                    // then set the flag below to true, so stages enter SCHEDULING_SPLITS state.
                    if (markerSplit.isResuming() || markerSplit.getSnapshotId() == 0) {
                        initialMarker = true;
                    }
                } else {
                    // MarkerSplit should be in its own batch.
                    verify(pendingSplits.stream().noneMatch(split -> split.getConnectorSplit() instanceof MarkerSplit));
                }
            }
            // remove splits with successful placements
            // AbstractSet.removeAll performs terribly here.
            splitAssignment.values().forEach(pendingSplits::remove);
            overallSplitAssignmentCount += splitAssignment.size();
            // if not completed placed, mark scheduleGroup as blocked on placement
            if (!pendingSplits.isEmpty()) {
                scheduleGroup.placementFuture = splitPlacementResult.getBlocked();
                overallBlockedFutures.add(scheduleGroup.placementFuture);
                anyBlockedOnPlacements = true;
            }
        }
        // if no new splits will be assigned, update state and attach completion event
        Multimap<InternalNode, Lifespan> noMoreSplitsNotification = ImmutableMultimap.of();
        if (pendingSplits.isEmpty() && scheduleGroup.state == ScheduleGroupState.NO_MORE_SPLITS) {
            scheduleGroup.state = ScheduleGroupState.DONE;
            if (!lifespan.isTaskWide()) {
                InternalNode node = ((BucketedSplitPlacementPolicy) splitPlacementPolicy).getNodeForBucket(lifespan.getId());
                noMoreSplitsNotification = ImmutableMultimap.of(node, lifespan);
            }
        }
        // assign the splits with successful placements
        overallNewTasks.addAll(assignSplits(splitAssignment, noMoreSplitsNotification));
        // As a result, to avoid busy loops caused by 1, we check pendingSplits.isEmpty() instead of placementFuture.isDone() here.
        if (scheduleGroup.nextSplitBatchFuture == null && scheduleGroup.pendingSplits.isEmpty() && scheduleGroup.state != ScheduleGroupState.DONE) {
            anyNotBlocked = true;
        }
    }
    // Next time it invokes getNextBatch, it will realize that. However, the invocation will fail we tear down splitSource now.
    if ((state == State.NO_MORE_SPLITS || state == State.FINISHED) || (noMoreScheduleGroups && scheduleGroups.isEmpty() && splitSource.isFinished())) {
        switch(state) {
            case INITIALIZED:
                // But this shouldn't be possible. See usage of EmptySplit in this method.
                throw new IllegalStateException("At least 1 split should have been scheduled for this plan node");
            case SPLITS_ADDED:
                state = State.NO_MORE_SPLITS;
                splitSource.close();
            // fall through
            case NO_MORE_SPLITS:
                state = State.FINISHED;
                whenFinishedOrNewLifespanAdded.set(null);
            // fall through
            case FINISHED:
                return new ScheduleResult(true, overallNewTasks.build(), overallSplitAssignmentCount);
            default:
                throw new IllegalStateException("Unknown state");
        }
    }
    if (anyNotBlocked) {
        if (initialMarker) {
            stage.transitionToSchedulingSplits();
        }
        return new ScheduleResult(false, overallNewTasks.build(), overallSplitAssignmentCount);
    }
    if (anyBlockedOnPlacements || groupedExecution) {
        // In a broadcast join, output buffers of the tasks in build source stage have to
        // hold onto all data produced before probe side task scheduling finishes,
        // even if the data is acknowledged by all known consumers. This is because
        // new consumers may be added until the probe side task scheduling finishes.
        // 
        // As a result, the following line is necessary to prevent deadlock
        // due to neither build nor probe can make any progress.
        // The build side blocks due to a full output buffer.
        // In the meantime the probe side split cannot be consumed since
        // builder side hash table construction has not finished.
        overallNewTasks.addAll(finalizeTaskCreationIfNecessary());
    }
    ScheduleResult.BlockedReason blockedReason;
    if (anyBlockedOnNextSplitBatch) {
        blockedReason = anyBlockedOnPlacements ? MIXED_SPLIT_QUEUES_FULL_AND_WAITING_FOR_SOURCE : WAITING_FOR_SOURCE;
    } else {
        blockedReason = anyBlockedOnPlacements ? SPLIT_QUEUES_FULL : NO_ACTIVE_DRIVER_GROUP;
    }
    overallBlockedFutures.add(whenFinishedOrNewLifespanAdded);
    return new ScheduleResult(false, overallNewTasks.build(), nonCancellationPropagating(whenAnyComplete(overallBlockedFutures)), blockedReason, overallSplitAssignmentCount);
}
Also used : SystemSessionProperties(io.prestosql.SystemSessionProperties) SettableFuture(com.google.common.util.concurrent.SettableFuture) BucketedSplitPlacementPolicy(io.prestosql.execution.scheduler.FixedSourcePartitionedScheduler.BucketedSplitPlacementPolicy) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) MIXED_SPLIT_QUEUES_FULL_AND_WAITING_FOR_SOURCE(io.prestosql.execution.scheduler.ScheduleResult.BlockedReason.MIXED_SPLIT_QUEUES_FULL_AND_WAITING_FOR_SOURCE) HashMultimap(com.google.common.collect.HashMultimap) Map(java.util.Map) EmptySplit(io.prestosql.split.EmptySplit) HeuristicIndexerManager(io.prestosql.heuristicindex.HeuristicIndexerManager) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) SystemSessionProperties.isHeuristicIndexFilterEnabled(io.prestosql.SystemSessionProperties.isHeuristicIndexFilterEnabled) Pair(io.prestosql.spi.heuristicindex.Pair) Preconditions.checkState(com.google.common.base.Preconditions.checkState) List(java.util.List) WAITING_FOR_SOURCE(io.prestosql.execution.scheduler.ScheduleResult.BlockedReason.WAITING_FOR_SOURCE) ConnectorPartitionHandle(io.prestosql.spi.connector.ConnectorPartitionHandle) Entry(java.util.Map.Entry) Optional(java.util.Optional) MoreFutures.whenAnyComplete(io.airlift.concurrent.MoreFutures.whenAnyComplete) NOT_PARTITIONED(io.prestosql.spi.connector.NotPartitionedPartitionHandle.NOT_PARTITIONED) Iterables(com.google.common.collect.Iterables) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) HashMap(java.util.HashMap) Split(io.prestosql.metadata.Split) Multimap(com.google.common.collect.Multimap) NO_ACTIVE_DRIVER_GROUP(io.prestosql.execution.scheduler.ScheduleResult.BlockedReason.NO_ACTIVE_DRIVER_GROUP) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) Verify.verify(com.google.common.base.Verify.verify) MarkerSplit(io.prestosql.snapshot.MarkerSplit) Objects.requireNonNull(java.util.Objects.requireNonNull) Session(io.prestosql.Session) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) SplitSource(io.prestosql.split.SplitSource) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) SPLIT_QUEUES_FULL(io.prestosql.execution.scheduler.ScheduleResult.BlockedReason.SPLIT_QUEUES_FULL) Lifespan(io.prestosql.execution.Lifespan) Symbol(io.prestosql.spi.plan.Symbol) SplitBatch(io.prestosql.split.SplitSource.SplitBatch) Iterator(java.util.Iterator) SplitFiltering(io.prestosql.heuristicindex.SplitFiltering) InternalNode(io.prestosql.metadata.InternalNode) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) Futures(com.google.common.util.concurrent.Futures) Futures.nonCancellationPropagating(com.google.common.util.concurrent.Futures.nonCancellationPropagating) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) RowExpression(io.prestosql.spi.relation.RowExpression) MoreFutures.addSuccessCallback(io.airlift.concurrent.MoreFutures.addSuccessCallback) SqlStageExecution(io.prestosql.execution.SqlStageExecution) RemoteTask(io.prestosql.execution.RemoteTask) Symbol(io.prestosql.spi.plan.Symbol) ArrayList(java.util.ArrayList) SplitBatch(io.prestosql.split.SplitSource.SplitBatch) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Pair(io.prestosql.spi.heuristicindex.Pair) HashSet(java.util.HashSet) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) EmptySplit(io.prestosql.split.EmptySplit) RemoteTask(io.prestosql.execution.RemoteTask) RowExpression(io.prestosql.spi.relation.RowExpression) MarkerSplit(io.prestosql.snapshot.MarkerSplit) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) InternalNode(io.prestosql.metadata.InternalNode) EmptySplit(io.prestosql.split.EmptySplit) Split(io.prestosql.metadata.Split) MarkerSplit(io.prestosql.snapshot.MarkerSplit) Lifespan(io.prestosql.execution.Lifespan) BucketedSplitPlacementPolicy(io.prestosql.execution.scheduler.FixedSourcePartitionedScheduler.BucketedSplitPlacementPolicy)

Example 18 with Pair

use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.

the class StatementAnalyzer method validateUpdateIndex.

private void validateUpdateIndex(Table table, Optional<Scope> scope) {
    UpdateIndex updateIndex = (UpdateIndex) analysis.getOriginalStatement();
    IndexRecord indexRecord;
    try {
        indexRecord = heuristicIndexerManager.getIndexClient().lookUpIndexRecord(updateIndex.getIndexName().toString());
    } catch (IOException e) {
        throw new UncheckedIOException("Error reading index records, ", e);
    }
    QualifiedObjectName tableFullName = QualifiedObjectName.valueOf(indexRecord.qualifiedTable);
    accessControl.checkCanCreateIndex(session.getRequiredTransactionId(), session.getIdentity(), tableFullName);
    String tableName = tableFullName.toString();
    Optional<TableHandle> tableHandle = metadata.getTableHandle(session, tableFullName);
    if (!tableHandle.isPresent()) {
        throw new SemanticException(MISSING_ATTRIBUTE, table, "Unable to update index. " + "Index table '%s' may have been dropped from outside OLK. Index should also be dropped.", tableFullName);
    }
    List<Pair<String, Type>> indexColumns = new LinkedList<>();
    for (String i : indexRecord.columns) {
        indexColumns.add(new Pair<>(i, UNKNOWN));
    }
    try {
        // Use this place holder to check the existence of index and lock the place
        Properties properties = new Properties();
        properties.setProperty(INPROGRESS_PROPERTY_KEY, "TRUE");
        CreateIndexMetadata placeHolder = new CreateIndexMetadata(updateIndex.getIndexName().toString(), tableName, indexRecord.indexType, 0L, indexColumns, indexRecord.partitions, properties, session.getUser(), UNDEFINED);
        synchronized (StatementAnalyzer.class) {
            IndexClient.RecordStatus recordStatus = heuristicIndexerManager.getIndexClient().lookUpIndexRecord(placeHolder);
            switch(recordStatus) {
                case IN_PROGRESS_SAME_NAME:
                    throw new SemanticException(INDEX_ALREADY_EXISTS, updateIndex, "Index '%s' is being created by another user. Check running queries for details. If there is no running query for this index, " + "the index may be in an unexpected error state and should be dropped using 'DROP INDEX %s'", updateIndex.getIndexName().toString(), updateIndex.getIndexName().toString());
                case IN_PROGRESS_SAME_CONTENT:
                    throw new SemanticException(INDEX_ALREADY_EXISTS, updateIndex, "Index with same (table,column,indexType) is being created by another user. Check running queries for details. " + "If there is no running query for this index, the index may be in an unexpected error state and should be dropped using 'DROP INDEX'");
                case IN_PROGRESS_SAME_INDEX_PART_CONFLICT:
                    if (indexRecord.partitions.isEmpty()) {
                        throw new SemanticException(INDEX_ALREADY_EXISTS, updateIndex, "Index with same (table,column,indexType) is being created by another user. Check running queries for details. " + "If there is no running query for this index, the index may be in an unexpected error state and should be dropped using 'DROP INDEX %s'", updateIndex.getIndexName().toString());
                    }
                // allow different queries to run with explicitly same partitions
                case NOT_FOUND:
                    throw new SemanticException(MISSING_INDEX, updateIndex, "Index with name '%s' does not exist", updateIndex.getIndexName().toString());
            }
        }
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
}
Also used : CreateIndexMetadata(io.prestosql.spi.connector.CreateIndexMetadata) IndexClient(io.prestosql.spi.heuristicindex.IndexClient) UncheckedIOException(java.io.UncheckedIOException) UpdateIndex(io.prestosql.sql.tree.UpdateIndex) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) NodeUtils.mapFromProperties(io.prestosql.sql.NodeUtils.mapFromProperties) SystemSessionProperties(io.prestosql.SystemSessionProperties) Properties(java.util.Properties) IndexRecord(io.prestosql.spi.heuristicindex.IndexRecord) QualifiedObjectName(io.prestosql.spi.connector.QualifiedObjectName) MetadataUtil.createQualifiedObjectName(io.prestosql.metadata.MetadataUtil.createQualifiedObjectName) LinkedList(java.util.LinkedList) TableHandle(io.prestosql.spi.metadata.TableHandle) Pair(io.prestosql.spi.heuristicindex.Pair)

Example 19 with Pair

use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.

the class BitmapIndex method addValues.

@Override
public boolean addValues(List<Pair<String, List<Object>>> values) throws IOException {
    checkClosed();
    // values can only be added once
    if (!updateAllowed.getAndSet(false)) {
        throw new UnsupportedOperationException("Unable to update index. " + "An existing Btree index can not be updated because all values must be added together since the " + "position of the values is important.");
    }
    if (values.size() != 1) {
        throw new UnsupportedOperationException("Only single column is supported.");
    }
    List<Object> columnValues = values.get(0).getSecond();
    Map<Object, ArrayList<Integer>> positions = new HashMap<>();
    for (int i = 0; i < columnValues.size(); i++) {
        Object value = columnValues.get(i);
        if (value != null) {
            positions.computeIfAbsent(value, k -> new ArrayList<>()).add(i);
        }
    }
    if (positions.isEmpty()) {
        return true;
    }
    List<kotlin.Pair> bitmaps = new ArrayList<>(positions.size());
    for (Map.Entry<Object, ArrayList<Integer>> e : positions.entrySet()) {
        int[] valuePositions = ArrayUtils.toPrimitive(e.getValue().toArray(new Integer[0]));
        RoaringBitmap rr = RoaringBitmap.bitmapOf(valuePositions);
        rr.runOptimize();
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        DataOutputStream dos = new DataOutputStream(bos);
        rr.serialize(dos);
        dos.close();
        Object value = convertToSupportedType(e.getKey());
        bitmaps.add(new kotlin.Pair(value, bos.toByteArray()));
    }
    Collections.sort(bitmaps, (o1, o2) -> ((Comparable) o1.component1()).compareTo(o2.component1()));
    getBtreeWriteOptimized(bitmaps.iterator().next().component1(), bitmaps.iterator());
    return true;
}
Also used : GroupSerializer(org.mapdb.serializer.GroupSerializer) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Logger(io.airlift.log.Logger) Marker(io.prestosql.spi.predicate.Marker) Date(java.util.Date) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ArrayUtils(org.apache.commons.lang3.ArrayUtils) HashMap(java.util.HashMap) IndexServiceUtils.getSerializer(io.hetu.core.heuristicindex.util.IndexServiceUtils.getSerializer) RoaringBitmap(org.roaringbitmap.RoaringBitmap) ByteBuffer(java.nio.ByteBuffer) SerializerCompressionWrapper(org.mapdb.serializer.SerializerCompressionWrapper) ImmutableRoaringBitmap(org.roaringbitmap.buffer.ImmutableRoaringBitmap) ArrayList(java.util.ArrayList) BigDecimal(java.math.BigDecimal) SnappyInputStream(org.xerial.snappy.SnappyInputStream) SnappyOutputStream(org.xerial.snappy.SnappyOutputStream) DataOutputStream(java.io.DataOutputStream) Range(io.prestosql.spi.predicate.Range) Map(java.util.Map) TypeUtils.getActualValue(io.prestosql.spi.heuristicindex.TypeUtils.getActualValue) OutputStream(java.io.OutputStream) ImmutableSet(com.google.common.collect.ImmutableSet) Properties(java.util.Properties) Iterator(java.util.Iterator) ConcurrentNavigableMap(java.util.concurrent.ConcurrentNavigableMap) FileOutputStream(java.io.FileOutputStream) Set(java.util.Set) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) UUID(java.util.UUID) SortedRangeSet(io.prestosql.spi.predicate.SortedRangeSet) Pair(io.prestosql.spi.heuristicindex.Pair) File(java.io.File) Serializer(org.mapdb.Serializer) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) BTreeMap(org.mapdb.BTreeMap) CreateIndexMetadata(io.prestosql.spi.connector.CreateIndexMetadata) Domain(io.prestosql.spi.predicate.Domain) DBMaker(org.mapdb.DBMaker) DB(org.mapdb.DB) IndexServiceUtils.extractType(io.hetu.core.heuristicindex.util.IndexServiceUtils.extractType) Index(io.prestosql.spi.heuristicindex.Index) Collections(java.util.Collections) InputStream(java.io.InputStream) HashMap(java.util.HashMap) DataOutputStream(java.io.DataOutputStream) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) RoaringBitmap(org.roaringbitmap.RoaringBitmap) ImmutableRoaringBitmap(org.roaringbitmap.buffer.ImmutableRoaringBitmap) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentNavigableMap(java.util.concurrent.ConcurrentNavigableMap) BTreeMap(org.mapdb.BTreeMap) Pair(io.prestosql.spi.heuristicindex.Pair)

Example 20 with Pair

use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.

the class FileIndexWriter method addData.

/**
 * This method IS thread-safe. Multiple operators can add data to one writer in parallel.
 *
 * @param values            values to be indexed
 * @param connectorMetadata metadata for the index
 */
@Override
public void addData(Map<String, List<Object>> values, Properties connectorMetadata) throws IOException {
    long stripeOffset = Long.parseLong(connectorMetadata.getProperty(DATASOURCE_STRIPE_OFFSET));
    // Add values first
    indexPages.computeIfAbsent(stripeOffset, k -> new ConcurrentHashMap<>());
    for (Map.Entry<String, List<Object>> e : values.entrySet()) {
        indexPages.get(stripeOffset).computeIfAbsent(e.getKey(), k -> Collections.synchronizedList(new LinkedList<>())).add(new AbstractMap.SimpleEntry(e.getValue(), Integer.parseInt(connectorMetadata.getProperty(DATASOURCE_PAGE_NUMBER))));
    }
    // Update page count
    int current = pageCountExpected.computeIfAbsent(stripeOffset, k -> new AtomicInteger()).decrementAndGet();
    if (connectorMetadata.getProperty(DATASOURCE_TOTAL_PAGES) != null) {
        int expected = Integer.parseInt(connectorMetadata.getProperty(DATASOURCE_TOTAL_PAGES));
        int updatedCurrent = pageCountExpected.get(stripeOffset).addAndGet(expected);
        LOG.debug("offset %d finishing page received, expected page count: %d, actual received: %d, remaining: %d", stripeOffset, expected, -current, updatedCurrent);
    }
    // Check page count to know if all pages have been received for a stripe. Persist and delete values if true to save memory
    if (pageCountExpected.get(stripeOffset).get() == 0) {
        synchronized (pageCountExpected.get(stripeOffset)) {
            if (indexPages.containsKey(stripeOffset)) {
                LOG.debug("All pages for offset %d have been received. Persisting.", stripeOffset);
                // sort the stripe's pages and collect the values into a single list
                List<Pair<String, List<Object>>> columnValuesMap = new ArrayList<>();
                // each entry represents a mapping from column name -> list<entry<page values, page number>>
                for (Map.Entry<String, List<Map.Entry<List<Object>, Integer>>> entry : indexPages.get(stripeOffset).entrySet()) {
                    // sort the page values lists based on page numbers
                    entry.getValue().sort(Comparator.comparingInt(Map.Entry::getValue));
                    // collect all page values lists into a single list
                    List<Object> columnValues = entry.getValue().stream().map(Map.Entry::getKey).flatMap(Collection::stream).collect(Collectors.toList());
                    columnValuesMap.add(new Pair(entry.getKey(), columnValues));
                }
                persistStripe(stripeOffset, columnValuesMap);
                indexPages.remove(stripeOffset);
            } else {
                LOG.debug("All pages for offset %d have been received, but the values are missing. " + "This stripe should have already been persisted by another thread.", stripeOffset);
            }
        }
    }
}
Also used : DATASOURCE_TOTAL_PAGES(io.prestosql.spi.HetuConstant.DATASOURCE_TOTAL_PAGES) Logger(io.airlift.log.Logger) HetuFileSystemClient(io.prestosql.spi.filesystem.HetuFileSystemClient) IndexConstants(io.hetu.core.heuristicindex.util.IndexConstants) IndexServiceUtils(io.hetu.core.heuristicindex.util.IndexServiceUtils) HetuLocalFileSystemClient(io.hetu.core.filesystem.HetuLocalFileSystemClient) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) DATASOURCE_STRIPE_OFFSET(io.prestosql.spi.HetuConstant.DATASOURCE_STRIPE_OFFSET) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HetuConstant(io.prestosql.spi.HetuConstant) Locale(java.util.Locale) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) LinkedList(java.util.LinkedList) Path(java.nio.file.Path) OutputStream(java.io.OutputStream) Properties(java.util.Properties) IndexWriter(io.prestosql.spi.heuristicindex.IndexWriter) Files(java.nio.file.Files) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) IOException(java.io.IOException) LocalConfig(io.hetu.core.filesystem.LocalConfig) Collectors(java.util.stream.Collectors) Pair(io.prestosql.spi.heuristicindex.Pair) DATASOURCE_PAGE_NUMBER(io.prestosql.spi.HetuConstant.DATASOURCE_PAGE_NUMBER) AbstractMap(java.util.AbstractMap) List(java.util.List) Paths(java.nio.file.Paths) CreateIndexMetadata(io.prestosql.spi.connector.CreateIndexMetadata) Comparator(java.util.Comparator) Index(io.prestosql.spi.heuristicindex.Index) Collections(java.util.Collections) ArrayList(java.util.ArrayList) AbstractMap(java.util.AbstractMap) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) AbstractMap(java.util.AbstractMap) Pair(io.prestosql.spi.heuristicindex.Pair)

Aggregations

Pair (io.prestosql.spi.heuristicindex.Pair)38 Test (org.testng.annotations.Test)25 File (java.io.File)24 FileOutputStream (java.io.FileOutputStream)24 FileInputStream (java.io.FileInputStream)23 ArrayList (java.util.ArrayList)22 RowExpression (io.prestosql.spi.relation.RowExpression)14 TempFolder (io.hetu.core.common.filesystem.TempFolder)12 List (java.util.List)10 IOException (java.io.IOException)9 CreateIndexMetadata (io.prestosql.spi.connector.CreateIndexMetadata)8 Map (java.util.Map)8 Properties (java.util.Properties)8 HashMap (java.util.HashMap)7 Iterator (java.util.Iterator)7 Collections (java.util.Collections)6 Index (io.prestosql.spi.heuristicindex.Index)5 Objects.requireNonNull (java.util.Objects.requireNonNull)5 Preconditions.checkState (com.google.common.base.Preconditions.checkState)4 HeuristicIndexerManager (io.prestosql.heuristicindex.HeuristicIndexerManager)4