Search in sources :

Example 6 with IndexRecord

use of io.prestosql.spi.heuristicindex.IndexRecord in project hetu-core by openlookeng.

the class UpdateIndexOperator method addInput.

@Override
public void addInput(Page page) {
    checkState(needsInput(), "Operator is already finishing");
    requireNonNull(page, "page is null");
    // TODO-cp-I38S9O: Operator currently not supported for Snapshot
    if (page instanceof MarkerPage) {
        throw new UnsupportedOperationException("Operator doesn't support snapshotting.");
    }
    // if operator is still receiving input, it's not finished
    finished.putIfAbsent(this, false);
    if (page.getPositionCount() == 0) {
        return;
    }
    IndexRecord indexRecord;
    try {
        indexRecord = heuristicIndexerManager.getIndexClient().lookUpIndexRecord(createIndexMetadata.getIndexName());
    } catch (IOException e) {
        throw new UncheckedIOException("Error reading index records, ", e);
    }
    if (createIndexMetadata.getCreateLevel() == CreateIndexMetadata.Level.UNDEFINED) {
        boolean tableIsPartitioned = getPartitionName(page.getPageMetadata().getProperty(HetuConstant.DATASOURCE_FILE_PATH), indexRecord.qualifiedTable) != null;
        createIndexMetadata.decideIndexLevel(tableIsPartitioned);
    }
    Map<String, List<Object>> values = new HashMap<>();
    for (int blockId = 0; blockId < page.getChannelCount(); blockId++) {
        Block block = page.getBlock(blockId);
        Pair<String, Type> entry = createIndexMetadata.getIndexColumns().get(blockId);
        String indexColumn = entry.getFirst();
        Type type = entry.getSecond();
        for (int position = 0; position < block.getPositionCount(); ++position) {
            Object value = getNativeValue(type, block, position);
            value = getActualValue(type, value);
            values.computeIfAbsent(indexColumn, k -> new ArrayList<>()).add(value);
        }
    }
    Properties connectorMetadata = new Properties();
    connectorMetadata.put(HetuConstant.DATASOURCE_CATALOG, createIndexMetadata.getTableName().split("\\.")[0]);
    connectorMetadata.putAll(page.getPageMetadata());
    try {
        switch(createIndexMetadata.getCreateLevel()) {
            case STRIPE:
                {
                    String filePath = page.getPageMetadata().getProperty(HetuConstant.DATASOURCE_FILE_PATH);
                    // The orc file this page resides in wasn't modified from when the index was created/last updated
                    if (pathToModifiedTime.containsKey(filePath) && pathToModifiedTime.get(filePath).equals(page.getPageMetadata().getProperty(HetuConstant.DATASOURCE_FILE_MODIFICATION))) {
                        return;
                    }
                    levelWriter.computeIfAbsent(filePath, k -> heuristicIndexerManager.getIndexWriter(createIndexMetadata, connectorMetadata));
                    persistBy.putIfAbsent(levelWriter.get(filePath), this);
                    levelWriter.get(filePath).addData(values, connectorMetadata);
                    break;
                }
            case PARTITION:
                {
                    String partition = getPartitionName(page.getPageMetadata().getProperty(HetuConstant.DATASOURCE_FILE_PATH), createIndexMetadata.getTableName());
                    indexLevelToMaxModifiedTime.compute(partition, (k, v) -> {
                        if (v != null && v >= (Long.parseLong(page.getPageMetadata().getProperty(HetuConstant.DATASOURCE_FILE_MODIFICATION)))) {
                            return v;
                        }
                        return (Long.parseLong(page.getPageMetadata().getProperty(HetuConstant.DATASOURCE_FILE_MODIFICATION)));
                    });
                    levelWriter.putIfAbsent(partition, heuristicIndexerManager.getIndexWriter(createIndexMetadata, connectorMetadata));
                    persistBy.putIfAbsent(levelWriter.get(partition), this);
                    levelWriter.get(partition).addData(values, connectorMetadata);
                    break;
                }
            case TABLE:
                {
                    indexLevelToMaxModifiedTime.compute(createIndexMetadata.getTableName(), (k, v) -> {
                        if (v != null && v >= (Long.parseLong(page.getPageMetadata().getProperty(HetuConstant.DATASOURCE_FILE_MODIFICATION)))) {
                            return v;
                        }
                        return (Long.parseLong(page.getPageMetadata().getProperty(HetuConstant.DATASOURCE_FILE_MODIFICATION)));
                    });
                    levelWriter.putIfAbsent(createIndexMetadata.getTableName(), heuristicIndexerManager.getIndexWriter(createIndexMetadata, connectorMetadata));
                    persistBy.putIfAbsent(levelWriter.get(createIndexMetadata.getTableName()), this);
                    levelWriter.get(createIndexMetadata.getTableName()).addData(values, connectorMetadata);
                    break;
                }
            default:
                throw new IllegalArgumentException("Create level not supported");
        }
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
}
Also used : CreateIndexOperator.getPartitionName(io.prestosql.operator.CreateIndexOperator.getPartitionName) Arrays(java.util.Arrays) UpdateIndexMetadata(io.prestosql.spi.connector.UpdateIndexMetadata) Logger(io.airlift.log.Logger) HashMap(java.util.HashMap) RestorableConfig(io.prestosql.spi.snapshot.RestorableConfig) ArrayList(java.util.ArrayList) CreateIndexOperator.getNativeValue(io.prestosql.operator.CreateIndexOperator.getNativeValue) HetuConstant(io.prestosql.spi.HetuConstant) Locale(java.util.Locale) IndexClient(io.prestosql.spi.heuristicindex.IndexClient) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) Type(io.prestosql.spi.type.Type) IndexRecord(io.prestosql.spi.heuristicindex.IndexRecord) TypeUtils.getActualValue(io.prestosql.spi.heuristicindex.TypeUtils.getActualValue) Block(io.prestosql.spi.block.Block) HeuristicIndexerManager(io.prestosql.heuristicindex.HeuristicIndexerManager) PlanNodeId(io.prestosql.spi.plan.PlanNodeId) Properties(java.util.Properties) MarkerPage(io.prestosql.spi.snapshot.MarkerPage) Iterator(java.util.Iterator) IndexWriter(io.prestosql.spi.heuristicindex.IndexWriter) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Page(io.prestosql.spi.Page) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Pair(io.prestosql.spi.heuristicindex.Pair) Preconditions.checkState(com.google.common.base.Preconditions.checkState) UncheckedIOException(java.io.UncheckedIOException) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) CreateIndexMetadata(io.prestosql.spi.connector.CreateIndexMetadata) Collections(java.util.Collections) MarkerPage(io.prestosql.spi.snapshot.MarkerPage) HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ArrayList(java.util.ArrayList) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) Properties(java.util.Properties) IndexRecord(io.prestosql.spi.heuristicindex.IndexRecord) Type(io.prestosql.spi.type.Type) Block(io.prestosql.spi.block.Block) ArrayList(java.util.ArrayList) List(java.util.List)

Example 7 with IndexRecord

use of io.prestosql.spi.heuristicindex.IndexRecord in project hetu-core by openlookeng.

the class IndexRecordManager method deleteIndexRecord.

/**
 * Delete index record from metastore according to name. Also allows partial deletion.
 *
 * @param name name of index to delete
 * @param partitionsToRemove the partitions to remove. If this list is empty, remove all.
 */
public synchronized void deleteIndexRecord(String name, List<String> partitionsToRemove) {
    getIndexRecords().stream().filter(record -> record.name.equals(name)).forEach(record -> {
        if (partitionsToRemove.isEmpty()) {
            metastore.alterTableParameter(record.catalog, record.schema, record.table, record.serializeKey(), null);
        } else {
            record.partitions.removeAll(partitionsToRemove);
            IndexRecord newRecord = new IndexRecord(record.name, record.user, record.qualifiedTable, record.columns, record.indexType, record.indexSize, record.propertiesAsList, record.partitions);
            metastore.alterTableParameter(record.catalog, record.schema, record.table, newRecord.serializeKey(), // if the last partition of the index has been dropped, remove the record
            newRecord.partitions.isEmpty() ? null : newRecord.serializeValue());
        }
    });
}
Also used : HetuMetastore(io.prestosql.spi.metastore.HetuMetastore) CatalogEntity(io.prestosql.spi.metastore.model.CatalogEntity) TableEntity(io.prestosql.spi.metastore.model.TableEntity) Arrays(java.util.Arrays) List(java.util.List) Logger(io.airlift.log.Logger) Map(java.util.Map) Optional(java.util.Optional) TableEntityType(io.prestosql.spi.metastore.model.TableEntityType) IndexRecord(io.prestosql.spi.heuristicindex.IndexRecord) DatabaseEntity(io.prestosql.spi.metastore.model.DatabaseEntity) ArrayList(java.util.ArrayList) IndexRecord(io.prestosql.spi.heuristicindex.IndexRecord)

Example 8 with IndexRecord

use of io.prestosql.spi.heuristicindex.IndexRecord in project hetu-core by openlookeng.

the class IndexCache method getIndices.

public List<IndexMetadata> getIndices(String table, String column, Split split, Map<String, IndexRecord> indexRecordKeyToRecordMap) {
    if (cache == null) {
        return Collections.emptyList();
    }
    URI splitUri = URI.create(URIUtil.encodePath(split.getConnectorSplit().getFilePath()));
    long lastModifiedTime = split.getConnectorSplit().getLastModifiedTime();
    List<IndexMetadata> indices = new LinkedList<>();
    for (String indexType : INDEX_TYPES) {
        String filterKeyPath = table + "/" + column + "/" + indexType + splitUri.getRawPath();
        String indexRecordKey = table + "/" + column + "/" + indexType;
        IndexRecord record = indexRecordKeyToRecordMap.get(indexRecordKey);
        IndexCacheKey filterKey = new IndexCacheKey(filterKeyPath, lastModifiedTime, record);
        // it is possible to return multiple SplitIndexMetadata due to the range mismatch, especially in the case
        // where the split has a wider range than the original splits used for index creation
        // check if cache contains the key
        List<IndexMetadata> indexOfThisType;
        // if cache didn't contain the key, it has not been loaded, load it asynchronously
        indexOfThisType = cache.getIfPresent(filterKey);
        if (indexOfThisType == null) {
            executor.schedule(() -> {
                try {
                    cache.get(filterKey);
                    LOG.debug("Loaded index for %s.", filterKey);
                } catch (ExecutionException e) {
                    if (e.getCause() instanceof IndexNotCreatedException) {
                    // Do nothing. Index not registered.
                    } else if (LOG.isDebugEnabled()) {
                        LOG.debug(e, "Unable to load index for %s. ", filterKey);
                    }
                }
            }, loadDelay, TimeUnit.MILLISECONDS);
        } else {
            // the index is only valid if the lastModifiedTime of the split matches the index's lastModifiedTime
            for (IndexMetadata index : indexOfThisType) {
                if (index.getLastModifiedTime() != lastModifiedTime) {
                    cache.invalidate(filterKey);
                    indexOfThisType = Collections.emptyList();
                    break;
                }
            }
            indices.addAll(indexOfThisType);
        }
    }
    return indices;
}
Also used : IndexNotCreatedException(io.prestosql.spi.heuristicindex.IndexNotCreatedException) IndexCacheKey(io.prestosql.spi.heuristicindex.IndexCacheKey) CreateIndexMetadata(io.prestosql.spi.connector.CreateIndexMetadata) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) ExecutionException(java.util.concurrent.ExecutionException) URI(java.net.URI) IndexRecord(io.prestosql.spi.heuristicindex.IndexRecord) LinkedList(java.util.LinkedList)

Example 9 with IndexRecord

use of io.prestosql.spi.heuristicindex.IndexRecord in project hetu-core by openlookeng.

the class SplitFiltering method getFilteredSplit.

public static List<Split> getFilteredSplit(Optional<RowExpression> expression, Optional<String> tableName, Map<Symbol, ColumnHandle> assignments, SplitSource.SplitBatch nextSplits, HeuristicIndexerManager heuristicIndexerManager) {
    if (!expression.isPresent() || !tableName.isPresent()) {
        return nextSplits.getSplits();
    }
    List<Split> allSplits = nextSplits.getSplits();
    String fullQualifiedTableName = tableName.get();
    long initialSplitsSize = allSplits.size();
    List<IndexRecord> indexRecords;
    try {
        indexRecords = heuristicIndexerManager.getIndexClient().getAllIndexRecords();
    } catch (IOException e) {
        LOG.debug("Filtering can't be done because not able to read index records", e);
        return allSplits;
    }
    Set<String> referencedColumns = new HashSet<>();
    getAllColumns(expression.get(), referencedColumns, assignments);
    Map<String, IndexRecord> forwardIndexRecords = new HashMap<>();
    Map<String, IndexRecord> invertedIndexRecords = new HashMap<>();
    for (IndexRecord indexRecord : indexRecords) {
        if (indexRecord.qualifiedTable.equalsIgnoreCase(fullQualifiedTableName)) {
            List<String> columnsInIndex = Arrays.asList(indexRecord.columns);
            for (String column : referencedColumns) {
                if (columnsInIndex.contains(column)) {
                    String indexRecordKey = indexRecord.qualifiedTable + "/" + column + "/" + indexRecord.indexType;
                    if (INVERTED_INDEX.contains(indexRecord.indexType.toUpperCase())) {
                        forwardIndexRecords.put(indexRecordKey, indexRecord);
                    } else {
                        invertedIndexRecords.put(indexRecordKey, indexRecord);
                    }
                }
            }
        }
    }
    List<Split> splitsToReturn;
    if (forwardIndexRecords.isEmpty() && invertedIndexRecords.isEmpty()) {
        return allSplits;
    } else if (!forwardIndexRecords.isEmpty() && invertedIndexRecords.isEmpty()) {
        splitsToReturn = filterUsingInvertedIndex(expression.get(), allSplits, fullQualifiedTableName, referencedColumns, forwardIndexRecords, heuristicIndexerManager);
    } else if (!invertedIndexRecords.isEmpty() && forwardIndexRecords.isEmpty()) {
        splitsToReturn = filterUsingForwardIndex(expression.get(), allSplits, fullQualifiedTableName, referencedColumns, invertedIndexRecords, heuristicIndexerManager);
    } else {
        // filter using both indexes and return the smallest set of splits.
        List<Split> splitsToReturn1 = filterUsingInvertedIndex(expression.get(), allSplits, fullQualifiedTableName, referencedColumns, forwardIndexRecords, heuristicIndexerManager);
        List<Split> splitsToReturn2 = filterUsingForwardIndex(expression.get(), allSplits, fullQualifiedTableName, referencedColumns, invertedIndexRecords, heuristicIndexerManager);
        splitsToReturn = splitsToReturn1.size() < splitsToReturn2.size() ? splitsToReturn1 : splitsToReturn2;
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("totalSplitsProcessed: " + totalSplitsProcessed.addAndGet(initialSplitsSize));
        LOG.debug("splitsFiltered: " + splitsFiltered.addAndGet(initialSplitsSize - splitsToReturn.size()));
    }
    return splitsToReturn;
}
Also used : HashMap(java.util.HashMap) IOException(java.io.IOException) Split(io.prestosql.metadata.Split) IndexRecord(io.prestosql.spi.heuristicindex.IndexRecord) HashSet(java.util.HashSet)

Example 10 with IndexRecord

use of io.prestosql.spi.heuristicindex.IndexRecord in project hetu-core by openlookeng.

the class HeuristicIndexerManager method updateIndexRecordUsage.

private void updateIndexRecordUsage(List<IndexRecord> targetRecords) {
    HashMap<IndexRecord, Long> indexRecordMemoryUse = new HashMap<IndexRecord, Long>();
    HashMap<IndexRecord, Long> indexRecordDiskUse = new HashMap<IndexRecord, Long>();
    for (IndexRecord record : targetRecords) {
        indexRecordMemoryUse.put(record, 0L);
        indexRecordDiskUse.put(record, 0L);
    }
    // get the memory and disk usage of the records from cache
    getIndexCache().readUsage(indexRecordMemoryUse, indexRecordDiskUse);
    for (IndexRecord record : targetRecords) {
        // update the indexRecord memory and disk usage field
        record.setMemoryUsage(indexRecordMemoryUse.get(record));
        record.setDiskUsage(indexRecordDiskUse.get(record));
    }
}
Also used : HashMap(java.util.HashMap) IndexRecord(io.prestosql.spi.heuristicindex.IndexRecord)

Aggregations

IndexRecord (io.prestosql.spi.heuristicindex.IndexRecord)20 IOException (java.io.IOException)10 CreateIndexMetadata (io.prestosql.spi.connector.CreateIndexMetadata)8 LinkedList (java.util.LinkedList)8 Map (java.util.Map)8 IndexMetadata (io.prestosql.spi.heuristicindex.IndexMetadata)7 ArrayList (java.util.ArrayList)7 IndexClient (io.prestosql.spi.heuristicindex.IndexClient)6 Pair (io.prestosql.spi.heuristicindex.Pair)6 UncheckedIOException (java.io.UncheckedIOException)6 HashMap (java.util.HashMap)6 List (java.util.List)6 Logger (io.airlift.log.Logger)5 Path (java.nio.file.Path)5 ImmutableList (com.google.common.collect.ImmutableList)4 IndexCacheKey (io.prestosql.spi.heuristicindex.IndexCacheKey)4 HetuMetastore (io.prestosql.spi.metastore.HetuMetastore)4 Collections (java.util.Collections)4 Collectors (java.util.stream.Collectors)4 ImmutableMap (com.google.common.collect.ImmutableMap)3