use of io.prestosql.spi.heuristicindex.IndexRecord in project hetu-core by openlookeng.
the class UpdateIndexOperator method addInput.
@Override
public void addInput(Page page) {
checkState(needsInput(), "Operator is already finishing");
requireNonNull(page, "page is null");
// TODO-cp-I38S9O: Operator currently not supported for Snapshot
if (page instanceof MarkerPage) {
throw new UnsupportedOperationException("Operator doesn't support snapshotting.");
}
// if operator is still receiving input, it's not finished
finished.putIfAbsent(this, false);
if (page.getPositionCount() == 0) {
return;
}
IndexRecord indexRecord;
try {
indexRecord = heuristicIndexerManager.getIndexClient().lookUpIndexRecord(createIndexMetadata.getIndexName());
} catch (IOException e) {
throw new UncheckedIOException("Error reading index records, ", e);
}
if (createIndexMetadata.getCreateLevel() == CreateIndexMetadata.Level.UNDEFINED) {
boolean tableIsPartitioned = getPartitionName(page.getPageMetadata().getProperty(HetuConstant.DATASOURCE_FILE_PATH), indexRecord.qualifiedTable) != null;
createIndexMetadata.decideIndexLevel(tableIsPartitioned);
}
Map<String, List<Object>> values = new HashMap<>();
for (int blockId = 0; blockId < page.getChannelCount(); blockId++) {
Block block = page.getBlock(blockId);
Pair<String, Type> entry = createIndexMetadata.getIndexColumns().get(blockId);
String indexColumn = entry.getFirst();
Type type = entry.getSecond();
for (int position = 0; position < block.getPositionCount(); ++position) {
Object value = getNativeValue(type, block, position);
value = getActualValue(type, value);
values.computeIfAbsent(indexColumn, k -> new ArrayList<>()).add(value);
}
}
Properties connectorMetadata = new Properties();
connectorMetadata.put(HetuConstant.DATASOURCE_CATALOG, createIndexMetadata.getTableName().split("\\.")[0]);
connectorMetadata.putAll(page.getPageMetadata());
try {
switch(createIndexMetadata.getCreateLevel()) {
case STRIPE:
{
String filePath = page.getPageMetadata().getProperty(HetuConstant.DATASOURCE_FILE_PATH);
// The orc file this page resides in wasn't modified from when the index was created/last updated
if (pathToModifiedTime.containsKey(filePath) && pathToModifiedTime.get(filePath).equals(page.getPageMetadata().getProperty(HetuConstant.DATASOURCE_FILE_MODIFICATION))) {
return;
}
levelWriter.computeIfAbsent(filePath, k -> heuristicIndexerManager.getIndexWriter(createIndexMetadata, connectorMetadata));
persistBy.putIfAbsent(levelWriter.get(filePath), this);
levelWriter.get(filePath).addData(values, connectorMetadata);
break;
}
case PARTITION:
{
String partition = getPartitionName(page.getPageMetadata().getProperty(HetuConstant.DATASOURCE_FILE_PATH), createIndexMetadata.getTableName());
indexLevelToMaxModifiedTime.compute(partition, (k, v) -> {
if (v != null && v >= (Long.parseLong(page.getPageMetadata().getProperty(HetuConstant.DATASOURCE_FILE_MODIFICATION)))) {
return v;
}
return (Long.parseLong(page.getPageMetadata().getProperty(HetuConstant.DATASOURCE_FILE_MODIFICATION)));
});
levelWriter.putIfAbsent(partition, heuristicIndexerManager.getIndexWriter(createIndexMetadata, connectorMetadata));
persistBy.putIfAbsent(levelWriter.get(partition), this);
levelWriter.get(partition).addData(values, connectorMetadata);
break;
}
case TABLE:
{
indexLevelToMaxModifiedTime.compute(createIndexMetadata.getTableName(), (k, v) -> {
if (v != null && v >= (Long.parseLong(page.getPageMetadata().getProperty(HetuConstant.DATASOURCE_FILE_MODIFICATION)))) {
return v;
}
return (Long.parseLong(page.getPageMetadata().getProperty(HetuConstant.DATASOURCE_FILE_MODIFICATION)));
});
levelWriter.putIfAbsent(createIndexMetadata.getTableName(), heuristicIndexerManager.getIndexWriter(createIndexMetadata, connectorMetadata));
persistBy.putIfAbsent(levelWriter.get(createIndexMetadata.getTableName()), this);
levelWriter.get(createIndexMetadata.getTableName()).addData(values, connectorMetadata);
break;
}
default:
throw new IllegalArgumentException("Create level not supported");
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
use of io.prestosql.spi.heuristicindex.IndexRecord in project hetu-core by openlookeng.
the class IndexRecordManager method deleteIndexRecord.
/**
* Delete index record from metastore according to name. Also allows partial deletion.
*
* @param name name of index to delete
* @param partitionsToRemove the partitions to remove. If this list is empty, remove all.
*/
public synchronized void deleteIndexRecord(String name, List<String> partitionsToRemove) {
getIndexRecords().stream().filter(record -> record.name.equals(name)).forEach(record -> {
if (partitionsToRemove.isEmpty()) {
metastore.alterTableParameter(record.catalog, record.schema, record.table, record.serializeKey(), null);
} else {
record.partitions.removeAll(partitionsToRemove);
IndexRecord newRecord = new IndexRecord(record.name, record.user, record.qualifiedTable, record.columns, record.indexType, record.indexSize, record.propertiesAsList, record.partitions);
metastore.alterTableParameter(record.catalog, record.schema, record.table, newRecord.serializeKey(), // if the last partition of the index has been dropped, remove the record
newRecord.partitions.isEmpty() ? null : newRecord.serializeValue());
}
});
}
use of io.prestosql.spi.heuristicindex.IndexRecord in project hetu-core by openlookeng.
the class IndexCache method getIndices.
public List<IndexMetadata> getIndices(String table, String column, Split split, Map<String, IndexRecord> indexRecordKeyToRecordMap) {
if (cache == null) {
return Collections.emptyList();
}
URI splitUri = URI.create(URIUtil.encodePath(split.getConnectorSplit().getFilePath()));
long lastModifiedTime = split.getConnectorSplit().getLastModifiedTime();
List<IndexMetadata> indices = new LinkedList<>();
for (String indexType : INDEX_TYPES) {
String filterKeyPath = table + "/" + column + "/" + indexType + splitUri.getRawPath();
String indexRecordKey = table + "/" + column + "/" + indexType;
IndexRecord record = indexRecordKeyToRecordMap.get(indexRecordKey);
IndexCacheKey filterKey = new IndexCacheKey(filterKeyPath, lastModifiedTime, record);
// it is possible to return multiple SplitIndexMetadata due to the range mismatch, especially in the case
// where the split has a wider range than the original splits used for index creation
// check if cache contains the key
List<IndexMetadata> indexOfThisType;
// if cache didn't contain the key, it has not been loaded, load it asynchronously
indexOfThisType = cache.getIfPresent(filterKey);
if (indexOfThisType == null) {
executor.schedule(() -> {
try {
cache.get(filterKey);
LOG.debug("Loaded index for %s.", filterKey);
} catch (ExecutionException e) {
if (e.getCause() instanceof IndexNotCreatedException) {
// Do nothing. Index not registered.
} else if (LOG.isDebugEnabled()) {
LOG.debug(e, "Unable to load index for %s. ", filterKey);
}
}
}, loadDelay, TimeUnit.MILLISECONDS);
} else {
// the index is only valid if the lastModifiedTime of the split matches the index's lastModifiedTime
for (IndexMetadata index : indexOfThisType) {
if (index.getLastModifiedTime() != lastModifiedTime) {
cache.invalidate(filterKey);
indexOfThisType = Collections.emptyList();
break;
}
}
indices.addAll(indexOfThisType);
}
}
return indices;
}
use of io.prestosql.spi.heuristicindex.IndexRecord in project hetu-core by openlookeng.
the class SplitFiltering method getFilteredSplit.
public static List<Split> getFilteredSplit(Optional<RowExpression> expression, Optional<String> tableName, Map<Symbol, ColumnHandle> assignments, SplitSource.SplitBatch nextSplits, HeuristicIndexerManager heuristicIndexerManager) {
if (!expression.isPresent() || !tableName.isPresent()) {
return nextSplits.getSplits();
}
List<Split> allSplits = nextSplits.getSplits();
String fullQualifiedTableName = tableName.get();
long initialSplitsSize = allSplits.size();
List<IndexRecord> indexRecords;
try {
indexRecords = heuristicIndexerManager.getIndexClient().getAllIndexRecords();
} catch (IOException e) {
LOG.debug("Filtering can't be done because not able to read index records", e);
return allSplits;
}
Set<String> referencedColumns = new HashSet<>();
getAllColumns(expression.get(), referencedColumns, assignments);
Map<String, IndexRecord> forwardIndexRecords = new HashMap<>();
Map<String, IndexRecord> invertedIndexRecords = new HashMap<>();
for (IndexRecord indexRecord : indexRecords) {
if (indexRecord.qualifiedTable.equalsIgnoreCase(fullQualifiedTableName)) {
List<String> columnsInIndex = Arrays.asList(indexRecord.columns);
for (String column : referencedColumns) {
if (columnsInIndex.contains(column)) {
String indexRecordKey = indexRecord.qualifiedTable + "/" + column + "/" + indexRecord.indexType;
if (INVERTED_INDEX.contains(indexRecord.indexType.toUpperCase())) {
forwardIndexRecords.put(indexRecordKey, indexRecord);
} else {
invertedIndexRecords.put(indexRecordKey, indexRecord);
}
}
}
}
}
List<Split> splitsToReturn;
if (forwardIndexRecords.isEmpty() && invertedIndexRecords.isEmpty()) {
return allSplits;
} else if (!forwardIndexRecords.isEmpty() && invertedIndexRecords.isEmpty()) {
splitsToReturn = filterUsingInvertedIndex(expression.get(), allSplits, fullQualifiedTableName, referencedColumns, forwardIndexRecords, heuristicIndexerManager);
} else if (!invertedIndexRecords.isEmpty() && forwardIndexRecords.isEmpty()) {
splitsToReturn = filterUsingForwardIndex(expression.get(), allSplits, fullQualifiedTableName, referencedColumns, invertedIndexRecords, heuristicIndexerManager);
} else {
// filter using both indexes and return the smallest set of splits.
List<Split> splitsToReturn1 = filterUsingInvertedIndex(expression.get(), allSplits, fullQualifiedTableName, referencedColumns, forwardIndexRecords, heuristicIndexerManager);
List<Split> splitsToReturn2 = filterUsingForwardIndex(expression.get(), allSplits, fullQualifiedTableName, referencedColumns, invertedIndexRecords, heuristicIndexerManager);
splitsToReturn = splitsToReturn1.size() < splitsToReturn2.size() ? splitsToReturn1 : splitsToReturn2;
}
if (LOG.isDebugEnabled()) {
LOG.debug("totalSplitsProcessed: " + totalSplitsProcessed.addAndGet(initialSplitsSize));
LOG.debug("splitsFiltered: " + splitsFiltered.addAndGet(initialSplitsSize - splitsToReturn.size()));
}
return splitsToReturn;
}
use of io.prestosql.spi.heuristicindex.IndexRecord in project hetu-core by openlookeng.
the class HeuristicIndexerManager method updateIndexRecordUsage.
private void updateIndexRecordUsage(List<IndexRecord> targetRecords) {
HashMap<IndexRecord, Long> indexRecordMemoryUse = new HashMap<IndexRecord, Long>();
HashMap<IndexRecord, Long> indexRecordDiskUse = new HashMap<IndexRecord, Long>();
for (IndexRecord record : targetRecords) {
indexRecordMemoryUse.put(record, 0L);
indexRecordDiskUse.put(record, 0L);
}
// get the memory and disk usage of the records from cache
getIndexCache().readUsage(indexRecordMemoryUse, indexRecordDiskUse);
for (IndexRecord record : targetRecords) {
// update the indexRecord memory and disk usage field
record.setMemoryUsage(indexRecordMemoryUse.get(record));
record.setDiskUsage(indexRecordDiskUse.get(record));
}
}
Aggregations