Search in sources :

Example 1 with IndexMetadata

use of io.prestosql.spi.heuristicindex.IndexMetadata in project hetu-core by openlookeng.

the class HeuristicIndexClient method readSplitIndex.

@Override
public List<IndexMetadata> readSplitIndex(String path) throws IOException {
    requireNonNull(path, "no path specified");
    List<IndexMetadata> indexes = new LinkedList<>();
    Path indexKeyPath = Paths.get(path);
    IndexRecord curIndex = null;
    try {
        curIndex = indexRecordManager.lookUpIndexRecord(indexKeyPath.subpath(0, 1).toString(), new String[] { indexKeyPath.subpath(1, 2).toString() }, indexKeyPath.subpath(2, 3).toString());
        if (curIndex == null) {
            // Use index record file to pre-screen. If record does not contain the index, skip loading
            return null;
        }
    } catch (Exception e) {
        // On exception, log and continue reading from disk
        LOG.debug("Error reading index records: " + path);
    }
    for (Map.Entry<String, Index> entry : readIndexMap(path, curIndex).entrySet()) {
        String absolutePath = entry.getKey();
        Path remainder = Paths.get(absolutePath.replaceFirst(root.toString(), ""));
        Path table = remainder.subpath(0, 1);
        remainder = Paths.get(remainder.toString().replaceFirst(table.toString(), ""));
        Path column = remainder.subpath(0, 1);
        remainder = Paths.get(remainder.toString().replaceFirst(column.toString(), ""));
        Path indexType = remainder.subpath(0, 1);
        remainder = Paths.get(remainder.toString().replaceFirst(indexType.toString(), ""));
        Path filenamePath = remainder.getFileName();
        if (filenamePath == null) {
            throw new IllegalArgumentException("Split path cannot be resolved: " + path);
        }
        remainder = remainder.getParent();
        table = table.getFileName();
        column = column.getFileName();
        indexType = indexType.getFileName();
        if (remainder == null || table == null || column == null || indexType == null) {
            throw new IllegalArgumentException("Split path cannot be resolved: " + path);
        }
        String filename = filenamePath.toString();
        long splitStart = Long.parseLong(filename.substring(0, filename.lastIndexOf('.')));
        String timeDir = Paths.get(table.toString(), column.toString(), indexType.toString(), remainder.toString()).toString();
        long lastUpdated = getLastModifiedTime(timeDir);
        IndexMetadata index = new IndexMetadata(entry.getValue(), table.toString(), new String[] { column.toString() }, root.toString(), remainder.toString(), splitStart, lastUpdated);
        indexes.add(index);
    }
    return indexes;
}
Also used : Path(java.nio.file.Path) BTreeIndex(io.hetu.core.plugin.heuristicindex.index.btree.BTreeIndex) Index(io.prestosql.spi.heuristicindex.Index) CreateIndexMetadata(io.prestosql.spi.connector.CreateIndexMetadata) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) IndexRecord(io.prestosql.spi.heuristicindex.IndexRecord) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) LinkedList(java.util.LinkedList) FileSystemException(java.nio.file.FileSystemException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException)

Example 2 with IndexMetadata

use of io.prestosql.spi.heuristicindex.IndexMetadata in project hetu-core by openlookeng.

the class IndexCache method getIndices.

public List<IndexMetadata> getIndices(String catalog, String table, HiveSplit hiveSplit, TupleDomain<HiveColumnHandle> effectivePredicate, List<HiveColumnHandle> partitions) {
    if (cache == null || catalog == null || table == null || hiveSplit == null || effectivePredicate == null) {
        return Collections.emptyList();
    }
    long lastModifiedTime = hiveSplit.getLastModifiedTime();
    Path path = new Path(hiveSplit.getPath());
    URI pathUri = URI.create(URIUtil.encodePath(path.toString()));
    String tableFqn = catalog + "." + table;
    // for each split, load indexes for each predicate (if the predicate contains an indexed column)
    List<IndexMetadata> splitIndexes = new LinkedList<>();
    effectivePredicate.getDomains().get().keySet().stream().filter(key -> partitions == null || !partitions.contains(key)).map(HiveColumnHandle::getName).map(String::toLowerCase).forEach(column -> {
        // e.g. catalog.schema.table or dc.catalog.schema.table
        if (!tableFqn.matches("([\\p{Alnum}_]+\\.){2,3}[\\p{Alnum}_]+")) {
            LOG.warn("Invalid table name " + tableFqn);
            return;
        }
        if (!column.matches("[\\p{Alnum}_]+")) {
            LOG.warn("Invalid column name " + column);
            return;
        }
        for (String indexType : INDEX_TYPES) {
            String indexCacheKeyPath = Paths.get(tableFqn, column, indexType, pathUri.getRawPath()).toString();
            IndexCacheKey indexCacheKey = new IndexCacheKey(indexCacheKeyPath, lastModifiedTime);
            // check if cache contains the key
            List<IndexMetadata> predicateIndexes = cache.getIfPresent(indexCacheKey);
            // if cache didn't contain the key, it has not been loaded, load it asynchronously
            if (predicateIndexes == null) {
                executor.schedule(() -> {
                    try {
                        cache.get(indexCacheKey);
                        LOG.debug("Loaded index for %s.", indexCacheKeyPath);
                    } catch (ExecutionException e) {
                        if (e.getCause() instanceof IndexNotCreatedException) {
                        // Do nothing. Index not registered.
                        } else if (LOG.isDebugEnabled()) {
                            LOG.debug(e, "Unable to load index for %s. ", indexCacheKeyPath);
                        }
                    }
                }, loadDelay, TimeUnit.MILLISECONDS);
            } else {
                // the index is only valid if the lastModifiedTime of the split matches the index's lastModifiedTime
                for (IndexMetadata index : predicateIndexes) {
                    if (index.getLastModifiedTime() != lastModifiedTime) {
                        cache.invalidate(indexCacheKey);
                        predicateIndexes = Collections.emptyList();
                        break;
                    }
                }
                // cache contained the key
                splitIndexes.addAll(predicateIndexes);
            }
        }
    });
    return splitIndexes;
}
Also used : Path(org.apache.hadoop.fs.Path) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) LoadingCache(com.google.common.cache.LoadingCache) Logger(io.airlift.log.Logger) IndexNotCreatedException(io.prestosql.spi.heuristicindex.IndexNotCreatedException) Inject(com.google.inject.Inject) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) ImmutableList(com.google.common.collect.ImmutableList) HetuConstant(io.prestosql.spi.HetuConstant) IndexClient(io.prestosql.spi.heuristicindex.IndexClient) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) Path(org.apache.hadoop.fs.Path) URI(java.net.URI) LinkedList(java.util.LinkedList) ThreadFactory(java.util.concurrent.ThreadFactory) IndexRecord(io.prestosql.spi.heuristicindex.IndexRecord) TupleDomain(io.prestosql.spi.predicate.TupleDomain) PropertyService(io.prestosql.spi.service.PropertyService) IOException(java.io.IOException) Executors(java.util.concurrent.Executors) HiveSplit(io.prestosql.plugin.hive.HiveSplit) CacheLoader(com.google.common.cache.CacheLoader) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) KILOBYTE(io.prestosql.spi.HetuConstant.KILOBYTE) Paths(java.nio.file.Paths) IndexCacheKey(io.prestosql.spi.heuristicindex.IndexCacheKey) URIUtil(org.eclipse.jetty.util.URIUtil) VisibleForTesting(com.google.common.annotations.VisibleForTesting) CacheBuilder(com.google.common.cache.CacheBuilder) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) Collections(java.util.Collections) IndexNotCreatedException(io.prestosql.spi.heuristicindex.IndexNotCreatedException) IndexCacheKey(io.prestosql.spi.heuristicindex.IndexCacheKey) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) ExecutionException(java.util.concurrent.ExecutionException) URI(java.net.URI) LinkedList(java.util.LinkedList) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle)

Example 3 with IndexMetadata

use of io.prestosql.spi.heuristicindex.IndexMetadata in project hetu-core by openlookeng.

the class IndexCacheLoader method load.

@Override
public List<IndexMetadata> load(IndexCacheKey key) throws Exception {
    requireNonNull(key);
    requireNonNull(indexClient);
    // only load index files if index lastModified matches key lastModified
    long lastModified;
    try {
        lastModified = indexClient.getLastModifiedTime(key.getPath());
    } catch (Exception e) {
        // no lastModified file found, i.e. index doesn't exist
        throw new IndexNotCreatedException();
    }
    if (lastModified != key.getLastModifiedTime()) {
        throw new Exception("Index files are expired for key " + key);
    }
    List<IndexMetadata> indices;
    try {
        indices = indexClient.readSplitIndex(key.getPath());
    } catch (Exception e) {
        throw new Exception("No valid index files found for key " + key, e);
    }
    // null indicates that the index is not registered in index records
    if (indices == null) {
        throw new IndexNotCreatedException();
    }
    // lastModified file was valid, but no index files for the given types
    if (indices.isEmpty()) {
        throw new Exception("No index files found for key " + key);
    }
    // Sort the indices based on split starting position
    return indices.stream().sorted(comparingLong(IndexMetadata::getSplitStart)).collect(Collectors.toList());
}
Also used : IndexNotCreatedException(io.prestosql.spi.heuristicindex.IndexNotCreatedException) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) IndexNotCreatedException(io.prestosql.spi.heuristicindex.IndexNotCreatedException)

Example 4 with IndexMetadata

use of io.prestosql.spi.heuristicindex.IndexMetadata in project hetu-core by openlookeng.

the class TestIndexCacheFetch method testIndexCacheGetIndices.

@Test
public void testIndexCacheGetIndices() throws Exception {
    synchronized (this) {
        IndexMetadata indexMetadata = mock(IndexMetadata.class);
        when(indexMetadata.getLastModifiedTime()).thenReturn(testLastModifiedTime);
        Index index = mock(Index.class);
        when(indexMetadata.getIndex()).then(new Returns(index));
        when(index.getMemoryUsage()).thenReturn(new DataSize(1, KILOBYTE).toBytes());
        List<IndexMetadata> expectedIndices = new LinkedList<>();
        expectedIndices.add(indexMetadata);
        IndexCacheLoader indexCacheLoader = mock(IndexCacheLoader.class);
        when(indexCacheLoader.load(any())).then(new Returns(expectedIndices));
        IndexCache indexCache = new IndexCache(indexCacheLoader, new NoOpIndexClient(), false);
        List<IndexMetadata> actualSplitIndex = indexCache.getIndices(table, column, split);
        assertEquals(actualSplitIndex.size(), 0);
        Thread.sleep(loadDelay + 2000);
        actualSplitIndex = indexCache.getIndices(table, column, split);
        assertEquals(actualSplitIndex.size(), numberOfIndexTypes);
        assertEquals(actualSplitIndex.get(0), expectedIndices.get(0));
    }
}
Also used : Returns(org.mockito.internal.stubbing.answers.Returns) DataSize(io.airlift.units.DataSize) Index(io.prestosql.spi.heuristicindex.Index) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) NoOpIndexClient(io.prestosql.testing.NoOpIndexClient) LinkedList(java.util.LinkedList) Test(org.testng.annotations.Test)

Example 5 with IndexMetadata

use of io.prestosql.spi.heuristicindex.IndexMetadata in project hetu-core by openlookeng.

the class TestIndexCacheFetch method testIndexCacheThrowsExecutionException.

@Test
public void testIndexCacheThrowsExecutionException() throws Exception {
    synchronized (this) {
        IndexMetadata indexMetadata = mock(IndexMetadata.class);
        when(indexMetadata.getLastModifiedTime()).then(new Returns(testLastModifiedTime));
        List<IndexMetadata> expectedIndices = new LinkedList<>();
        expectedIndices.add(indexMetadata);
        IndexCacheLoader indexCacheLoader = mock(IndexCacheLoader.class);
        when(indexCacheLoader.load(any())).thenThrow(ExecutionException.class);
        IndexCache indexCache = new IndexCache(indexCacheLoader, new NoOpIndexClient(), false);
        List<IndexMetadata> actualSplitIndex = indexCache.getIndices(table, column, split);
        assertEquals(actualSplitIndex.size(), 0);
        Thread.sleep(loadDelay + 2000);
        actualSplitIndex = indexCache.getIndices(table, column, split);
        assertEquals(actualSplitIndex.size(), 0);
    }
}
Also used : Returns(org.mockito.internal.stubbing.answers.Returns) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) NoOpIndexClient(io.prestosql.testing.NoOpIndexClient) LinkedList(java.util.LinkedList) Test(org.testng.annotations.Test)

Aggregations

IndexMetadata (io.prestosql.spi.heuristicindex.IndexMetadata)30 LinkedList (java.util.LinkedList)19 Test (org.testng.annotations.Test)13 Index (io.prestosql.spi.heuristicindex.Index)10 ImmutableList (com.google.common.collect.ImmutableList)9 DataSize (io.airlift.units.DataSize)9 NoOpIndexClient (io.prestosql.testing.NoOpIndexClient)9 List (java.util.List)9 Map (java.util.Map)9 ArrayList (java.util.ArrayList)8 Returns (org.mockito.internal.stubbing.answers.Returns)8 CreateIndexMetadata (io.prestosql.spi.connector.CreateIndexMetadata)7 IndexCacheKey (io.prestosql.spi.heuristicindex.IndexCacheKey)7 ImmutableMap (com.google.common.collect.ImmutableMap)6 HiveSplit (io.prestosql.plugin.hive.HiveSplit)6 IndexRecord (io.prestosql.spi.heuristicindex.IndexRecord)6 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)5 IndexNotCreatedException (io.prestosql.spi.heuristicindex.IndexNotCreatedException)5 RowExpression (io.prestosql.spi.relation.RowExpression)5 URI (java.net.URI)5