use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.
the class StatementAnalyzer method validateCreateIndex.
private void validateCreateIndex(Table table, Optional<Scope> scope) {
CreateIndex createIndex = (CreateIndex) analysis.getOriginalStatement();
QualifiedObjectName tableFullName = createQualifiedObjectName(session, createIndex, createIndex.getTableName());
accessControl.checkCanCreateIndex(session.getRequiredTransactionId(), session.getIdentity(), tableFullName);
String tableName = tableFullName.toString();
// check whether catalog support create index
if (!metadata.isHeuristicIndexSupported(session, tableFullName)) {
throw new SemanticException(NOT_SUPPORTED, createIndex, "CREATE INDEX is not supported in catalog '%s'", tableFullName.getCatalogName());
}
List<String> partitions = new ArrayList<>();
String partitionColumn = null;
if (createIndex.getExpression().isPresent()) {
partitions = HeuristicIndexUtils.extractPartitions(createIndex.getExpression().get());
// check partition name validate, create index …… where pt_d = xxx;
// pt_d must be partition column
Set<String> partitionColumns = partitions.stream().map(k -> k.substring(0, k.indexOf("="))).collect(Collectors.toSet());
if (partitionColumns.size() > 1) {
// currently only support one partition column
throw new IllegalArgumentException("Heuristic index only supports predicates on one column");
}
// The only entry in set should be the only partition column name
partitionColumn = partitionColumns.iterator().next();
}
Optional<TableHandle> tableHandle = metadata.getTableHandle(session, tableFullName);
if (tableHandle.isPresent()) {
if (!tableHandle.get().getConnectorHandle().isHeuristicIndexSupported()) {
throw new SemanticException(NOT_SUPPORTED, table, "Catalog supported, but table storage format is not supported by heuristic index");
}
TableMetadata tableMetadata = metadata.getTableMetadata(session, tableHandle.get());
List<String> availableColumns = tableMetadata.getColumns().stream().map(ColumnMetadata::getName).collect(Collectors.toList());
for (Identifier column : createIndex.getColumnAliases()) {
if (!availableColumns.contains(column.getValue().toLowerCase(Locale.ROOT))) {
throw new SemanticException(MISSING_ATTRIBUTE, table, "Column '%s' cannot be resolved", column.getValue());
}
}
if (partitionColumn != null && !tableHandle.get().getConnectorHandle().isPartitionColumn(partitionColumn)) {
throw new SemanticException(NOT_SUPPORTED, table, "Heuristic index creation is only supported for predicates on partition columns");
}
} else {
throw new SemanticException(MISSING_ATTRIBUTE, table, "Table '%s' is invalid", tableFullName);
}
List<Pair<String, Type>> indexColumns = new LinkedList<>();
for (Identifier i : createIndex.getColumnAliases()) {
indexColumns.add(new Pair<>(i.toString(), UNKNOWN));
}
// For now, creating index for multiple columns is not supported
if (indexColumns.size() > 1) {
throw new SemanticException(NOT_SUPPORTED, table, "Multi-column indexes are currently not supported");
}
try {
// Use this place holder to check the existence of index and lock the place
Properties properties = new Properties();
properties.setProperty(INPROGRESS_PROPERTY_KEY, "TRUE");
CreateIndexMetadata placeHolder = new CreateIndexMetadata(createIndex.getIndexName().toString(), tableName, createIndex.getIndexType(), 0L, indexColumns, partitions, properties, session.getUser(), UNDEFINED);
synchronized (StatementAnalyzer.class) {
IndexClient.RecordStatus recordStatus = heuristicIndexerManager.getIndexClient().lookUpIndexRecord(placeHolder);
switch(recordStatus) {
case SAME_NAME:
throw new SemanticException(INDEX_ALREADY_EXISTS, createIndex, "Index '%s' already exists", createIndex.getIndexName().toString());
case SAME_CONTENT:
throw new SemanticException(INDEX_ALREADY_EXISTS, createIndex, "Index with same (table,column,indexType) already exists");
case SAME_INDEX_PART_CONFLICT:
throw new SemanticException(INDEX_ALREADY_EXISTS, createIndex, "Index with same (table,column,indexType) already exists and partition(s) contain conflicts");
case IN_PROGRESS_SAME_NAME:
throw new SemanticException(INDEX_ALREADY_EXISTS, createIndex, "Index '%s' is being created by another user. Check running queries for details. If there is no running query for this index, " + "the index may be in an unexpected error state and should be dropped using 'DROP INDEX %s'", createIndex.getIndexName().toString(), createIndex.getIndexName().toString());
case IN_PROGRESS_SAME_CONTENT:
throw new SemanticException(INDEX_ALREADY_EXISTS, createIndex, "Index with same (table,column,indexType) is being created by another user. Check running queries for details. " + "If there is no running query for this index, the index may be in an unexpected error state and should be dropped using 'DROP INDEX'");
case IN_PROGRESS_SAME_INDEX_PART_CONFLICT:
if (partitions.isEmpty()) {
throw new SemanticException(INDEX_ALREADY_EXISTS, createIndex, "Index with same (table,column,indexType) is being created by another user. Check running queries for details. " + "If there is no running query for this index, the index may be in an unexpected error state and should be dropped using 'DROP INDEX %s'", createIndex.getIndexName().toString());
}
// allow different queries to run with explicitly same partitions
case SAME_INDEX_PART_CAN_MERGE:
case IN_PROGRESS_SAME_INDEX_PART_CAN_MERGE:
break;
case NOT_FOUND:
heuristicIndexerManager.getIndexClient().addIndexRecord(placeHolder);
}
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.
the class SourcePartitionedScheduler method schedule.
@Override
public synchronized ScheduleResult schedule(int maxSplitGroup) {
dropListenersFromWhenFinishedOrNewLifespansAdded();
int overallSplitAssignmentCount = 0;
ImmutableSet.Builder<RemoteTask> overallNewTasks = ImmutableSet.builder();
List<ListenableFuture<?>> overallBlockedFutures = new ArrayList<>();
boolean anyBlockedOnPlacements = false;
boolean anyBlockedOnNextSplitBatch = false;
boolean anyNotBlocked = false;
boolean applyFilter = isHeuristicIndexFilterEnabled(session) && SplitFiltering.isSplitFilterApplicable(stage);
boolean initialMarker = false;
for (Entry<Lifespan, ScheduleGroup> entry : scheduleGroups.entrySet()) {
Lifespan lifespan = entry.getKey();
ScheduleGroup scheduleGroup = entry.getValue();
Set<Split> pendingSplits = scheduleGroup.pendingSplits;
if (scheduleGroup.state == ScheduleGroupState.NO_MORE_SPLITS || scheduleGroup.state == ScheduleGroupState.DONE) {
verify(scheduleGroup.nextSplitBatchFuture == null);
} else if (pendingSplits.isEmpty()) {
// try to get the next batch
if (scheduleGroup.nextSplitBatchFuture == null) {
scheduleGroup.nextSplitBatchFuture = splitSource.getNextBatch(scheduleGroup.partitionHandle, lifespan, splitBatchSize - pendingSplits.size());
long start = System.nanoTime();
addSuccessCallback(scheduleGroup.nextSplitBatchFuture, () -> stage.recordGetSplitTime(start));
}
if (scheduleGroup.nextSplitBatchFuture.isDone()) {
SplitBatch nextSplits = getFutureValue(scheduleGroup.nextSplitBatchFuture);
scheduleGroup.nextSplitBatchFuture = null;
// add split filter to filter out split has no valid rows
Pair<Optional<RowExpression>, Map<Symbol, ColumnHandle>> pair = SplitFiltering.getExpression(stage);
if (SystemSessionProperties.isSnapshotEnabled(session)) {
List<Split> batchSplits = nextSplits.getSplits();
// Don't apply filter to MarkerSplit
if (batchSplits.size() == 1 && batchSplits.get(0).getConnectorSplit() instanceof MarkerSplit) {
applyFilter = false;
}
}
List<Split> filteredSplit = applyFilter ? SplitFiltering.getFilteredSplit(pair.getFirst(), SplitFiltering.getFullyQualifiedName(stage), pair.getSecond(), nextSplits, heuristicIndexerManager) : nextSplits.getSplits();
// In case of ORC small size files/splits are grouped
List<Split> groupedSmallFilesList = splitSource.groupSmallSplits(filteredSplit, lifespan, maxSplitGroup);
filteredSplit = groupedSmallFilesList;
pendingSplits.addAll(filteredSplit);
if (nextSplits.isLastBatch()) {
if (scheduleGroup.state == ScheduleGroupState.INITIALIZED && pendingSplits.isEmpty()) {
// Add an empty split in case no splits have been produced for the source.
// For source operators, they never take input, but they may produce output.
// This is well handled by Presto execution engine.
// However, there are certain non-source operators that may produce output without any input,
// for example, 1) an AggregationOperator, 2) a HashAggregationOperator where one of the grouping sets is ().
// Scheduling an empty split kicks off necessary driver instantiation to make this work.
pendingSplits.add(new Split(splitSource.getCatalogName(), new EmptySplit(splitSource.getCatalogName()), lifespan));
}
scheduleGroup.state = ScheduleGroupState.NO_MORE_SPLITS;
}
} else {
overallBlockedFutures.add(scheduleGroup.nextSplitBatchFuture);
anyBlockedOnNextSplitBatch = true;
continue;
}
}
Multimap<InternalNode, Split> splitAssignment = ImmutableMultimap.of();
if (!pendingSplits.isEmpty()) {
if (!scheduleGroup.placementFuture.isDone()) {
anyBlockedOnPlacements = true;
continue;
}
if (scheduleGroup.state == ScheduleGroupState.INITIALIZED) {
scheduleGroup.state = ScheduleGroupState.SPLITS_ADDED;
}
if (state == State.INITIALIZED) {
state = State.SPLITS_ADDED;
}
// calculate placements for splits
SplitPlacementResult splitPlacementResult;
if (stage.isThrottledSchedule()) {
// If asked for partial schedule incase of lesser resource, then schedule only 10% of splits.
// 10% is calculated on initial number of splits and same is being used on subsequent schedule also.
// But if later 10% of current pending splits more than earlier 10%, then it will schedule max of
// these.
// if throttledSplitsCount is more than number of pendingSplits, then it will schedule all.
throttledSplitsCount = Math.max((int) Math.ceil(pendingSplits.size() * ALLOWED_PERCENT_LIMIT), throttledSplitsCount);
splitPlacementResult = splitPlacementPolicy.computeAssignments(ImmutableSet.copyOf(Iterables.limit(pendingSplits, throttledSplitsCount)), this.stage);
} else {
splitPlacementResult = splitPlacementPolicy.computeAssignments(new HashSet<>(pendingSplits), this.stage);
}
splitAssignment = splitPlacementResult.getAssignments();
if (SystemSessionProperties.isSnapshotEnabled(session)) {
Split firstSplit = pendingSplits.iterator().next();
if (pendingSplits.size() == 1 && firstSplit.getConnectorSplit() instanceof MarkerSplit) {
// We'll create a new assignment, but still need to call computeAssignments above, and cannot modify the returned assignment map directly
splitAssignment = HashMultimap.create(splitAssignment);
splitAssignment.values().remove(firstSplit);
// Getting all internalNodes and assigning marker splits to all of them.
List<InternalNode> allNodes = splitPlacementPolicy.allNodes();
for (InternalNode node : allNodes) {
splitAssignment.put(node, firstSplit);
}
MarkerSplit markerSplit = (MarkerSplit) firstSplit.getConnectorSplit();
// then set the flag below to true, so stages enter SCHEDULING_SPLITS state.
if (markerSplit.isResuming() || markerSplit.getSnapshotId() == 0) {
initialMarker = true;
}
} else {
// MarkerSplit should be in its own batch.
verify(pendingSplits.stream().noneMatch(split -> split.getConnectorSplit() instanceof MarkerSplit));
}
}
// remove splits with successful placements
// AbstractSet.removeAll performs terribly here.
splitAssignment.values().forEach(pendingSplits::remove);
overallSplitAssignmentCount += splitAssignment.size();
// if not completed placed, mark scheduleGroup as blocked on placement
if (!pendingSplits.isEmpty()) {
scheduleGroup.placementFuture = splitPlacementResult.getBlocked();
overallBlockedFutures.add(scheduleGroup.placementFuture);
anyBlockedOnPlacements = true;
}
}
// if no new splits will be assigned, update state and attach completion event
Multimap<InternalNode, Lifespan> noMoreSplitsNotification = ImmutableMultimap.of();
if (pendingSplits.isEmpty() && scheduleGroup.state == ScheduleGroupState.NO_MORE_SPLITS) {
scheduleGroup.state = ScheduleGroupState.DONE;
if (!lifespan.isTaskWide()) {
InternalNode node = ((BucketedSplitPlacementPolicy) splitPlacementPolicy).getNodeForBucket(lifespan.getId());
noMoreSplitsNotification = ImmutableMultimap.of(node, lifespan);
}
}
// assign the splits with successful placements
overallNewTasks.addAll(assignSplits(splitAssignment, noMoreSplitsNotification));
// As a result, to avoid busy loops caused by 1, we check pendingSplits.isEmpty() instead of placementFuture.isDone() here.
if (scheduleGroup.nextSplitBatchFuture == null && scheduleGroup.pendingSplits.isEmpty() && scheduleGroup.state != ScheduleGroupState.DONE) {
anyNotBlocked = true;
}
}
// Next time it invokes getNextBatch, it will realize that. However, the invocation will fail we tear down splitSource now.
if ((state == State.NO_MORE_SPLITS || state == State.FINISHED) || (noMoreScheduleGroups && scheduleGroups.isEmpty() && splitSource.isFinished())) {
switch(state) {
case INITIALIZED:
// But this shouldn't be possible. See usage of EmptySplit in this method.
throw new IllegalStateException("At least 1 split should have been scheduled for this plan node");
case SPLITS_ADDED:
state = State.NO_MORE_SPLITS;
splitSource.close();
// fall through
case NO_MORE_SPLITS:
state = State.FINISHED;
whenFinishedOrNewLifespanAdded.set(null);
// fall through
case FINISHED:
return new ScheduleResult(true, overallNewTasks.build(), overallSplitAssignmentCount);
default:
throw new IllegalStateException("Unknown state");
}
}
if (anyNotBlocked) {
if (initialMarker) {
stage.transitionToSchedulingSplits();
}
return new ScheduleResult(false, overallNewTasks.build(), overallSplitAssignmentCount);
}
if (anyBlockedOnPlacements || groupedExecution) {
// In a broadcast join, output buffers of the tasks in build source stage have to
// hold onto all data produced before probe side task scheduling finishes,
// even if the data is acknowledged by all known consumers. This is because
// new consumers may be added until the probe side task scheduling finishes.
//
// As a result, the following line is necessary to prevent deadlock
// due to neither build nor probe can make any progress.
// The build side blocks due to a full output buffer.
// In the meantime the probe side split cannot be consumed since
// builder side hash table construction has not finished.
overallNewTasks.addAll(finalizeTaskCreationIfNecessary());
}
ScheduleResult.BlockedReason blockedReason;
if (anyBlockedOnNextSplitBatch) {
blockedReason = anyBlockedOnPlacements ? MIXED_SPLIT_QUEUES_FULL_AND_WAITING_FOR_SOURCE : WAITING_FOR_SOURCE;
} else {
blockedReason = anyBlockedOnPlacements ? SPLIT_QUEUES_FULL : NO_ACTIVE_DRIVER_GROUP;
}
overallBlockedFutures.add(whenFinishedOrNewLifespanAdded);
return new ScheduleResult(false, overallNewTasks.build(), nonCancellationPropagating(whenAnyComplete(overallBlockedFutures)), blockedReason, overallSplitAssignmentCount);
}
use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.
the class StatementAnalyzer method validateUpdateIndex.
private void validateUpdateIndex(Table table, Optional<Scope> scope) {
UpdateIndex updateIndex = (UpdateIndex) analysis.getOriginalStatement();
IndexRecord indexRecord;
try {
indexRecord = heuristicIndexerManager.getIndexClient().lookUpIndexRecord(updateIndex.getIndexName().toString());
} catch (IOException e) {
throw new UncheckedIOException("Error reading index records, ", e);
}
QualifiedObjectName tableFullName = QualifiedObjectName.valueOf(indexRecord.qualifiedTable);
accessControl.checkCanCreateIndex(session.getRequiredTransactionId(), session.getIdentity(), tableFullName);
String tableName = tableFullName.toString();
Optional<TableHandle> tableHandle = metadata.getTableHandle(session, tableFullName);
if (!tableHandle.isPresent()) {
throw new SemanticException(MISSING_ATTRIBUTE, table, "Unable to update index. " + "Index table '%s' may have been dropped from outside OLK. Index should also be dropped.", tableFullName);
}
List<Pair<String, Type>> indexColumns = new LinkedList<>();
for (String i : indexRecord.columns) {
indexColumns.add(new Pair<>(i, UNKNOWN));
}
try {
// Use this place holder to check the existence of index and lock the place
Properties properties = new Properties();
properties.setProperty(INPROGRESS_PROPERTY_KEY, "TRUE");
CreateIndexMetadata placeHolder = new CreateIndexMetadata(updateIndex.getIndexName().toString(), tableName, indexRecord.indexType, 0L, indexColumns, indexRecord.partitions, properties, session.getUser(), UNDEFINED);
synchronized (StatementAnalyzer.class) {
IndexClient.RecordStatus recordStatus = heuristicIndexerManager.getIndexClient().lookUpIndexRecord(placeHolder);
switch(recordStatus) {
case IN_PROGRESS_SAME_NAME:
throw new SemanticException(INDEX_ALREADY_EXISTS, updateIndex, "Index '%s' is being created by another user. Check running queries for details. If there is no running query for this index, " + "the index may be in an unexpected error state and should be dropped using 'DROP INDEX %s'", updateIndex.getIndexName().toString(), updateIndex.getIndexName().toString());
case IN_PROGRESS_SAME_CONTENT:
throw new SemanticException(INDEX_ALREADY_EXISTS, updateIndex, "Index with same (table,column,indexType) is being created by another user. Check running queries for details. " + "If there is no running query for this index, the index may be in an unexpected error state and should be dropped using 'DROP INDEX'");
case IN_PROGRESS_SAME_INDEX_PART_CONFLICT:
if (indexRecord.partitions.isEmpty()) {
throw new SemanticException(INDEX_ALREADY_EXISTS, updateIndex, "Index with same (table,column,indexType) is being created by another user. Check running queries for details. " + "If there is no running query for this index, the index may be in an unexpected error state and should be dropped using 'DROP INDEX %s'", updateIndex.getIndexName().toString());
}
// allow different queries to run with explicitly same partitions
case NOT_FOUND:
throw new SemanticException(MISSING_INDEX, updateIndex, "Index with name '%s' does not exist", updateIndex.getIndexName().toString());
}
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.
the class BitmapIndex method addValues.
@Override
public boolean addValues(List<Pair<String, List<Object>>> values) throws IOException {
checkClosed();
// values can only be added once
if (!updateAllowed.getAndSet(false)) {
throw new UnsupportedOperationException("Unable to update index. " + "An existing Btree index can not be updated because all values must be added together since the " + "position of the values is important.");
}
if (values.size() != 1) {
throw new UnsupportedOperationException("Only single column is supported.");
}
List<Object> columnValues = values.get(0).getSecond();
Map<Object, ArrayList<Integer>> positions = new HashMap<>();
for (int i = 0; i < columnValues.size(); i++) {
Object value = columnValues.get(i);
if (value != null) {
positions.computeIfAbsent(value, k -> new ArrayList<>()).add(i);
}
}
if (positions.isEmpty()) {
return true;
}
List<kotlin.Pair> bitmaps = new ArrayList<>(positions.size());
for (Map.Entry<Object, ArrayList<Integer>> e : positions.entrySet()) {
int[] valuePositions = ArrayUtils.toPrimitive(e.getValue().toArray(new Integer[0]));
RoaringBitmap rr = RoaringBitmap.bitmapOf(valuePositions);
rr.runOptimize();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(bos);
rr.serialize(dos);
dos.close();
Object value = convertToSupportedType(e.getKey());
bitmaps.add(new kotlin.Pair(value, bos.toByteArray()));
}
Collections.sort(bitmaps, (o1, o2) -> ((Comparable) o1.component1()).compareTo(o2.component1()));
getBtreeWriteOptimized(bitmaps.iterator().next().component1(), bitmaps.iterator());
return true;
}
use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.
the class FileIndexWriter method addData.
/**
* This method IS thread-safe. Multiple operators can add data to one writer in parallel.
*
* @param values values to be indexed
* @param connectorMetadata metadata for the index
*/
@Override
public void addData(Map<String, List<Object>> values, Properties connectorMetadata) throws IOException {
long stripeOffset = Long.parseLong(connectorMetadata.getProperty(DATASOURCE_STRIPE_OFFSET));
// Add values first
indexPages.computeIfAbsent(stripeOffset, k -> new ConcurrentHashMap<>());
for (Map.Entry<String, List<Object>> e : values.entrySet()) {
indexPages.get(stripeOffset).computeIfAbsent(e.getKey(), k -> Collections.synchronizedList(new LinkedList<>())).add(new AbstractMap.SimpleEntry(e.getValue(), Integer.parseInt(connectorMetadata.getProperty(DATASOURCE_PAGE_NUMBER))));
}
// Update page count
int current = pageCountExpected.computeIfAbsent(stripeOffset, k -> new AtomicInteger()).decrementAndGet();
if (connectorMetadata.getProperty(DATASOURCE_TOTAL_PAGES) != null) {
int expected = Integer.parseInt(connectorMetadata.getProperty(DATASOURCE_TOTAL_PAGES));
int updatedCurrent = pageCountExpected.get(stripeOffset).addAndGet(expected);
LOG.debug("offset %d finishing page received, expected page count: %d, actual received: %d, remaining: %d", stripeOffset, expected, -current, updatedCurrent);
}
// Check page count to know if all pages have been received for a stripe. Persist and delete values if true to save memory
if (pageCountExpected.get(stripeOffset).get() == 0) {
synchronized (pageCountExpected.get(stripeOffset)) {
if (indexPages.containsKey(stripeOffset)) {
LOG.debug("All pages for offset %d have been received. Persisting.", stripeOffset);
// sort the stripe's pages and collect the values into a single list
List<Pair<String, List<Object>>> columnValuesMap = new ArrayList<>();
// each entry represents a mapping from column name -> list<entry<page values, page number>>
for (Map.Entry<String, List<Map.Entry<List<Object>, Integer>>> entry : indexPages.get(stripeOffset).entrySet()) {
// sort the page values lists based on page numbers
entry.getValue().sort(Comparator.comparingInt(Map.Entry::getValue));
// collect all page values lists into a single list
List<Object> columnValues = entry.getValue().stream().map(Map.Entry::getKey).flatMap(Collection::stream).collect(Collectors.toList());
columnValuesMap.add(new Pair(entry.getKey(), columnValues));
}
persistStripe(stripeOffset, columnValuesMap);
indexPages.remove(stripeOffset);
} else {
LOG.debug("All pages for offset %d have been received, but the values are missing. " + "This stripe should have already been persisted by another thread.", stripeOffset);
}
}
}
}
Aggregations