Search in sources :

Example 1 with Index

use of io.prestosql.spi.heuristicindex.Index in project hetu-core by openlookeng.

the class FileIndexWriter method persistStripe.

private void persistStripe(Long offset, List<Pair<String, List<Object>>> stripeData) throws IOException {
    synchronized (this) {
        if (tmpPath == null) {
            tmpPath = Files.createTempDirectory("tmp-indexwriter-");
        }
    }
    // Get sum of expected entries
    int expectedNumEntries = 0;
    for (Pair<String, List<Object>> l : stripeData) {
        expectedNumEntries += new HashSet<>(l.getSecond()).size();
    }
    // Create index and put values
    try (Index index = HeuristicIndexFactory.createIndex(createIndexMetadata.getIndexType())) {
        index.setProperties(createIndexMetadata.getProperties());
        index.setExpectedNumOfEntries(expectedNumEntries);
        index.addValues(stripeData);
        // Persist one index (e.g. 3.bloom)
        String indexFileName = offset + "." + index.getId();
        try (OutputStream os = LOCAL_FS_CLIENT.newOutputStream(tmpPath.resolve(indexFileName))) {
            index.serialize(os);
        }
    }
}
Also used : OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) Index(io.prestosql.spi.heuristicindex.Index) HashSet(java.util.HashSet)

Example 2 with Index

use of io.prestosql.spi.heuristicindex.Index in project hetu-core by openlookeng.

the class HeuristicIndexClient method readSplitIndex.

@Override
public List<IndexMetadata> readSplitIndex(String path) throws IOException {
    requireNonNull(path, "no path specified");
    List<IndexMetadata> indexes = new LinkedList<>();
    Path indexKeyPath = Paths.get(path);
    IndexRecord curIndex = null;
    try {
        curIndex = indexRecordManager.lookUpIndexRecord(indexKeyPath.subpath(0, 1).toString(), new String[] { indexKeyPath.subpath(1, 2).toString() }, indexKeyPath.subpath(2, 3).toString());
        if (curIndex == null) {
            // Use index record file to pre-screen. If record does not contain the index, skip loading
            return null;
        }
    } catch (Exception e) {
        // On exception, log and continue reading from disk
        LOG.debug("Error reading index records: " + path);
    }
    for (Map.Entry<String, Index> entry : readIndexMap(path, curIndex).entrySet()) {
        String absolutePath = entry.getKey();
        Path remainder = Paths.get(absolutePath.replaceFirst(root.toString(), ""));
        Path table = remainder.subpath(0, 1);
        remainder = Paths.get(remainder.toString().replaceFirst(table.toString(), ""));
        Path column = remainder.subpath(0, 1);
        remainder = Paths.get(remainder.toString().replaceFirst(column.toString(), ""));
        Path indexType = remainder.subpath(0, 1);
        remainder = Paths.get(remainder.toString().replaceFirst(indexType.toString(), ""));
        Path filenamePath = remainder.getFileName();
        if (filenamePath == null) {
            throw new IllegalArgumentException("Split path cannot be resolved: " + path);
        }
        remainder = remainder.getParent();
        table = table.getFileName();
        column = column.getFileName();
        indexType = indexType.getFileName();
        if (remainder == null || table == null || column == null || indexType == null) {
            throw new IllegalArgumentException("Split path cannot be resolved: " + path);
        }
        String filename = filenamePath.toString();
        long splitStart = Long.parseLong(filename.substring(0, filename.lastIndexOf('.')));
        String timeDir = Paths.get(table.toString(), column.toString(), indexType.toString(), remainder.toString()).toString();
        long lastUpdated = getLastModifiedTime(timeDir);
        IndexMetadata index = new IndexMetadata(entry.getValue(), table.toString(), new String[] { column.toString() }, root.toString(), remainder.toString(), splitStart, lastUpdated);
        indexes.add(index);
    }
    return indexes;
}
Also used : Path(java.nio.file.Path) BTreeIndex(io.hetu.core.plugin.heuristicindex.index.btree.BTreeIndex) Index(io.prestosql.spi.heuristicindex.Index) CreateIndexMetadata(io.prestosql.spi.connector.CreateIndexMetadata) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) IndexRecord(io.prestosql.spi.heuristicindex.IndexRecord) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) LinkedList(java.util.LinkedList) FileSystemException(java.nio.file.FileSystemException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException)

Example 3 with Index

use of io.prestosql.spi.heuristicindex.Index in project hetu-core by openlookeng.

the class HeuristicIndexClient method readIndexMap.

/**
 * Reads all files at the specified path recursively.
 * <br>
 * If the file extension matches a supported index type id, the index is loaded.
 * For example, if a file name is filename.bloom, then the file will be loaded
 * as a BloomIndex.
 *
 * @param path relative path to the index file or dir, if dir, it will be searched recursively (relative to the
 * root uri, if one was set)
 * @return an immutable mapping from all index files read to the corresponding index that was loaded
 * @throws IOException
 */
private Map<String, Index> readIndexMap(String path, IndexRecord indexRecord) throws IOException {
    ImmutableMap.Builder<String, Index> result = ImmutableMap.builder();
    // get the absolute path to the file being read
    Path absolutePath = Paths.get(root.toString(), path);
    // check required for security scan since we are constructing a path using input
    checkArgument(!absolutePath.toString().contains("../"), absolutePath + " must be absolute and under one of the following whitelisted directories:  " + SecurePathWhiteList.getSecurePathWhiteList().toString());
    checkArgument(SecurePathWhiteList.isSecurePath(absolutePath), absolutePath + " must be under one of the following whitelisted directories: " + SecurePathWhiteList.getSecurePathWhiteList().toString());
    if (!fs.exists(absolutePath)) {
        return ImmutableMap.of();
    }
    try (Stream<Path> tarsOnRemote = fs.walk(absolutePath).filter(p -> p.toString().contains(".tar"))) {
        for (Path tarFile : (Iterable<Path>) tarsOnRemote::iterator) {
            try (TarArchiveInputStream i = new TarArchiveInputStream(fs.newInputStream(tarFile))) {
                ArchiveEntry entry;
                while ((entry = i.getNextEntry()) != null) {
                    if (!i.canReadEntryData(entry)) {
                        throw new FileSystemException("Unable to read archive entry: " + entry.toString());
                    }
                    String filename = entry.getName();
                    if (!filename.contains(".")) {
                        continue;
                    }
                    String indexType = filename.substring(filename.lastIndexOf('.') + 1);
                    Index index = HeuristicIndexFactory.createIndex(indexType);
                    // set property for index
                    index.setProperties(indexRecord.getProperties());
                    // deserialize from file
                    index.deserialize(new CloseShieldInputStream(i));
                    LOG.debug("Loaded %s index from %s.", index.getId(), tarFile.toAbsolutePath());
                    result.put(tarFile.getParent().resolve(filename).toString(), index);
                }
            }
        }
    }
    Map<String, Index> resultMap = result.build();
    return resultMap;
}
Also used : Path(java.nio.file.Path) BTreeIndex(io.hetu.core.plugin.heuristicindex.index.btree.BTreeIndex) Index(io.prestosql.spi.heuristicindex.Index) ArchiveEntry(org.apache.commons.compress.archivers.ArchiveEntry) ImmutableMap(com.google.common.collect.ImmutableMap) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) FileSystemException(java.nio.file.FileSystemException) CloseShieldInputStream(org.apache.commons.io.input.CloseShieldInputStream)

Example 4 with Index

use of io.prestosql.spi.heuristicindex.Index in project hetu-core by openlookeng.

the class HeuristicIndexClient method deleteIndex.

@Override
public void deleteIndex(String indexName, List<String> partitionsToDelete) throws IOException {
    IndexRecord indexRecord = indexRecordManager.lookUpIndexRecord(indexName);
    // dir structure example: root/catalog.schema.table/column1,column2/BLOOM
    Path tablePath = root.resolve(indexRecord.qualifiedTable);
    Path columnPath = tablePath.resolve(String.join(",", indexRecord.columns));
    Path indexLevelPath = columnPath.resolve(indexRecord.indexType);
    if (!fs.exists(indexLevelPath)) {
        indexRecordManager.deleteIndexRecord(indexName, partitionsToDelete);
        return;
    }
    Lock lock = new FileBasedLock(fs, indexLevelPath.getParent());
    try {
        Runtime.getRuntime().addShutdownHook(new Thread(() -> {
            lock.unlock();
            try {
                fs.close();
            } catch (IOException e) {
                throw new UncheckedIOException("Error closing FileSystem Client: " + fs.getClass().getName(), e);
            }
        }));
        lock.lock();
        if (partitionsToDelete.isEmpty()) {
            fs.deleteRecursively(indexLevelPath);
        } else {
            List<Path> toDeletePartitions = fs.walk(indexLevelPath).filter(fs::isDirectory).filter(path -> partitionsToDelete.contains(path.getFileName().toString())).collect(Collectors.toList());
            for (Path path : toDeletePartitions) {
                fs.deleteRecursively(path);
            }
            // if all partitions have been deleted, remove index path
            if (fs.walk(indexLevelPath).allMatch(fs::isDirectory)) {
                fs.deleteRecursively(indexLevelPath);
            }
        }
        try {
            // clean empty directories
            if (fs.list(columnPath).allMatch(FileBasedLock::isLockUtilFile)) {
                fs.deleteRecursively(columnPath);
            }
            if (fs.list(tablePath).allMatch(FileBasedLock::isLockUtilFile)) {
                fs.deleteRecursively(tablePath);
            }
        } catch (Exception e) {
            LOG.debug("failed to clean empty index directory", e);
        }
    } finally {
        lock.unlock();
    }
    indexRecordManager.deleteIndexRecord(indexName, partitionsToDelete);
}
Also used : Path(java.nio.file.Path) HetuMetastore(io.prestosql.spi.metastore.HetuMetastore) Logger(io.airlift.log.Logger) HetuFileSystemClient(io.prestosql.spi.filesystem.HetuFileSystemClient) ArchiveEntry(org.apache.commons.compress.archivers.ArchiveEntry) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) IndexConstants(io.hetu.core.heuristicindex.util.IndexConstants) FileBasedLock(io.prestosql.spi.filesystem.FileBasedLock) ArrayList(java.util.ArrayList) BTreeIndex(io.hetu.core.plugin.heuristicindex.index.btree.BTreeIndex) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ImmutableList(com.google.common.collect.ImmutableList) CloseShieldInputStream(org.apache.commons.io.input.CloseShieldInputStream) IndexClient(io.prestosql.spi.heuristicindex.IndexClient) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) LinkedList(java.util.LinkedList) IndexRecord(io.prestosql.spi.heuristicindex.IndexRecord) Path(java.nio.file.Path) ImmutableMap(com.google.common.collect.ImmutableMap) FileSystemException(java.nio.file.FileSystemException) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Pair(io.prestosql.spi.heuristicindex.Pair) UncheckedIOException(java.io.UncheckedIOException) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) Lock(java.util.concurrent.locks.Lock) Stream(java.util.stream.Stream) Paths(java.nio.file.Paths) CreateIndexMetadata(io.prestosql.spi.connector.CreateIndexMetadata) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) Index(io.prestosql.spi.heuristicindex.Index) Collections(java.util.Collections) SecurePathWhiteList(io.hetu.core.common.util.SecurePathWhiteList) InputStream(java.io.InputStream) FileBasedLock(io.prestosql.spi.filesystem.FileBasedLock) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) IndexRecord(io.prestosql.spi.heuristicindex.IndexRecord) FileSystemException(java.nio.file.FileSystemException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) FileBasedLock(io.prestosql.spi.filesystem.FileBasedLock) Lock(java.util.concurrent.locks.Lock)

Example 5 with Index

use of io.prestosql.spi.heuristicindex.Index in project hetu-core by openlookeng.

the class HeuristicIndexFactory method getIndexWriter.

@Override
public IndexWriter getIndexWriter(CreateIndexMetadata createIndexMetadata, Properties connectorMetadata, HetuFileSystemClient fs, Path root) {
    LOG.debug("Creating index writer for catalogName: %s", connectorMetadata.getProperty(HetuConstant.DATASOURCE_CATALOG));
    Properties indexProps = createIndexMetadata.getProperties();
    LOG.debug("indexProps: %s", indexProps);
    String indexType = createIndexMetadata.getIndexType();
    Index index = getIndexObjFromID(indexType);
    if (!index.getSupportedIndexLevels().contains(createIndexMetadata.getCreateLevel())) {
        throw new IllegalArgumentException(indexType + " does not support specified index level");
    }
    switch(createIndexMetadata.getCreateLevel()) {
        case STRIPE:
            return new FileIndexWriter(createIndexMetadata, connectorMetadata, fs, root);
        case PARTITION:
        case TABLE:
            return new PartitionIndexWriter(createIndexMetadata, fs, root);
        default:
            throw new IllegalArgumentException(indexType + " has no supported index writer");
    }
}
Also used : MinMaxIndex(io.hetu.core.plugin.heuristicindex.index.minmax.MinMaxIndex) BTreeIndex(io.hetu.core.plugin.heuristicindex.index.btree.BTreeIndex) BloomIndex(io.hetu.core.plugin.heuristicindex.index.bloom.BloomIndex) BitmapIndex(io.hetu.core.plugin.heuristicindex.index.bitmap.BitmapIndex) Index(io.prestosql.spi.heuristicindex.Index) Properties(java.util.Properties)

Aggregations

Index (io.prestosql.spi.heuristicindex.Index)24 LinkedList (java.util.LinkedList)17 IndexMetadata (io.prestosql.spi.heuristicindex.IndexMetadata)15 Test (org.testng.annotations.Test)14 DataSize (io.airlift.units.DataSize)12 NoOpIndexClient (io.prestosql.testing.NoOpIndexClient)11 Returns (org.mockito.internal.stubbing.answers.Returns)11 ArrayList (java.util.ArrayList)10 IOException (java.io.IOException)9 List (java.util.List)9 HiveSplit (io.prestosql.plugin.hive.HiveSplit)8 Pair (io.prestosql.spi.heuristicindex.Pair)8 Collections (java.util.Collections)7 Map (java.util.Map)7 CreateIndexMetadata (io.prestosql.spi.connector.CreateIndexMetadata)6 ImmutableMap (com.google.common.collect.ImmutableMap)5 Logger (io.airlift.log.Logger)5 File (java.io.File)5 FileInputStream (java.io.FileInputStream)5 FileOutputStream (java.io.FileOutputStream)5