Search in sources :

Example 16 with Index

use of io.prestosql.spi.heuristicindex.Index in project hetu-core by openlookeng.

the class TestIndexCacheRemoval method testIndexCacheEviction.

@Test
public void testIndexCacheEviction() throws Exception {
    synchronized (this) {
        HiveSplit testHiveSplit;
        testHiveSplit = mock(HiveSplit.class);
        when(testHiveSplit.getPath()).thenReturn(testPath);
        when(testHiveSplit.getLastModifiedTime()).thenReturn(testLastModifiedTime);
        IndexCacheLoader indexCacheLoader = mock(IndexCacheLoader.class);
        IndexCache indexCache = new IndexCache(indexCacheLoader, loadDelay, new NoOpIndexClient());
        // get index for split1
        IndexMetadata indexMetadata1 = mock(IndexMetadata.class);
        when(indexMetadata1.getLastModifiedTime()).thenReturn(testLastModifiedTime);
        Index index1 = mock(Index.class);
        when(indexMetadata1.getIndex()).thenReturn(index1);
        when(index1.getMemoryUsage()).thenReturn(new DataSize(2, KILOBYTE).toBytes());
        List<IndexMetadata> expectedIndices1 = new LinkedList<>();
        expectedIndices1.add(indexMetadata1);
        when(indexCacheLoader.load(any())).then(new Returns(expectedIndices1));
        // each index is has memory usage of 2, and limit is 2*types of idx, so all should be loaded
        List<IndexMetadata> actualSplitIndex = indexCache.getIndices(catalog, table, testHiveSplit, effectivePredicate, testPartitions);
        assertEquals(actualSplitIndex.size(), 0);
        Thread.sleep(loadDelay + 2000);
        actualSplitIndex = indexCache.getIndices(catalog, table, testHiveSplit, effectivePredicate, testPartitions);
        assertEquals(actualSplitIndex.size(), numberOfIndexTypes);
        assertEquals(actualSplitIndex.get(0), indexMetadata1);
        assertEquals(indexCache.getCacheSize(), numberOfIndexTypes);
        // get index for split2
        when(testHiveSplit.getPath()).thenReturn(testPath2);
        IndexMetadata indexMetadata2 = mock(IndexMetadata.class);
        when(indexMetadata2.getLastModifiedTime()).thenReturn(testLastModifiedTime);
        Index index2 = mock(Index.class);
        when(indexMetadata2.getIndex()).thenReturn(index2);
        when(index2.getMemoryUsage()).thenReturn(new DataSize(2, KILOBYTE).toBytes());
        // previous indexes should be evicted bc cache was at max weight limit and new ones should be added
        List<IndexMetadata> expectedIndices2 = new LinkedList<>();
        expectedIndices2.add(indexMetadata2);
        when(indexCacheLoader.load(any())).then(new Returns(expectedIndices2));
        actualSplitIndex = indexCache.getIndices(catalog, table, testHiveSplit, effectivePredicate, testPartitions);
        assertEquals(actualSplitIndex.size(), 0);
        assertEquals(indexCache.getCacheSize(), numberOfIndexTypes);
        Thread.sleep(loadDelay + 2000);
        actualSplitIndex = indexCache.getIndices(catalog, table, testHiveSplit, effectivePredicate, testPartitions);
        assertEquals(actualSplitIndex.size(), numberOfIndexTypes);
        assertEquals(actualSplitIndex.get(0), indexMetadata2);
        assertEquals(indexCache.getCacheSize(), numberOfIndexTypes);
        // get index for split1
        when(testHiveSplit.getPath()).thenReturn(testPath);
        actualSplitIndex = indexCache.getIndices(catalog, table, testHiveSplit, effectivePredicate, testPartitions);
        assertEquals(actualSplitIndex.size(), 0);
        assertEquals(indexCache.getCacheSize(), numberOfIndexTypes);
    }
}
Also used : Returns(org.mockito.internal.stubbing.answers.Returns) HiveSplit(io.prestosql.plugin.hive.HiveSplit) DataSize(io.airlift.units.DataSize) Index(io.prestosql.spi.heuristicindex.Index) NoOpIndexClient(io.prestosql.testing.NoOpIndexClient) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) LinkedList(java.util.LinkedList) Test(org.testng.annotations.Test)

Example 17 with Index

use of io.prestosql.spi.heuristicindex.Index in project hetu-core by openlookeng.

the class BitmapIndex method addValues.

@Override
public boolean addValues(List<Pair<String, List<Object>>> values) throws IOException {
    checkClosed();
    // values can only be added once
    if (!updateAllowed.getAndSet(false)) {
        throw new UnsupportedOperationException("Unable to update index. " + "An existing Btree index can not be updated because all values must be added together since the " + "position of the values is important.");
    }
    if (values.size() != 1) {
        throw new UnsupportedOperationException("Only single column is supported.");
    }
    List<Object> columnValues = values.get(0).getSecond();
    Map<Object, ArrayList<Integer>> positions = new HashMap<>();
    for (int i = 0; i < columnValues.size(); i++) {
        Object value = columnValues.get(i);
        if (value != null) {
            positions.computeIfAbsent(value, k -> new ArrayList<>()).add(i);
        }
    }
    if (positions.isEmpty()) {
        return true;
    }
    List<kotlin.Pair> bitmaps = new ArrayList<>(positions.size());
    for (Map.Entry<Object, ArrayList<Integer>> e : positions.entrySet()) {
        int[] valuePositions = ArrayUtils.toPrimitive(e.getValue().toArray(new Integer[0]));
        RoaringBitmap rr = RoaringBitmap.bitmapOf(valuePositions);
        rr.runOptimize();
        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        DataOutputStream dos = new DataOutputStream(bos);
        rr.serialize(dos);
        dos.close();
        Object value = convertToSupportedType(e.getKey());
        bitmaps.add(new kotlin.Pair(value, bos.toByteArray()));
    }
    Collections.sort(bitmaps, (o1, o2) -> ((Comparable) o1.component1()).compareTo(o2.component1()));
    getBtreeWriteOptimized(bitmaps.iterator().next().component1(), bitmaps.iterator());
    return true;
}
Also used : GroupSerializer(org.mapdb.serializer.GroupSerializer) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Logger(io.airlift.log.Logger) Marker(io.prestosql.spi.predicate.Marker) Date(java.util.Date) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ArrayUtils(org.apache.commons.lang3.ArrayUtils) HashMap(java.util.HashMap) IndexServiceUtils.getSerializer(io.hetu.core.heuristicindex.util.IndexServiceUtils.getSerializer) RoaringBitmap(org.roaringbitmap.RoaringBitmap) ByteBuffer(java.nio.ByteBuffer) SerializerCompressionWrapper(org.mapdb.serializer.SerializerCompressionWrapper) ImmutableRoaringBitmap(org.roaringbitmap.buffer.ImmutableRoaringBitmap) ArrayList(java.util.ArrayList) BigDecimal(java.math.BigDecimal) SnappyInputStream(org.xerial.snappy.SnappyInputStream) SnappyOutputStream(org.xerial.snappy.SnappyOutputStream) DataOutputStream(java.io.DataOutputStream) Range(io.prestosql.spi.predicate.Range) Map(java.util.Map) TypeUtils.getActualValue(io.prestosql.spi.heuristicindex.TypeUtils.getActualValue) OutputStream(java.io.OutputStream) ImmutableSet(com.google.common.collect.ImmutableSet) Properties(java.util.Properties) Iterator(java.util.Iterator) ConcurrentNavigableMap(java.util.concurrent.ConcurrentNavigableMap) FileOutputStream(java.io.FileOutputStream) Set(java.util.Set) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) UUID(java.util.UUID) SortedRangeSet(io.prestosql.spi.predicate.SortedRangeSet) Pair(io.prestosql.spi.heuristicindex.Pair) File(java.io.File) Serializer(org.mapdb.Serializer) IOUtils(org.apache.commons.io.IOUtils) List(java.util.List) BTreeMap(org.mapdb.BTreeMap) CreateIndexMetadata(io.prestosql.spi.connector.CreateIndexMetadata) Domain(io.prestosql.spi.predicate.Domain) DBMaker(org.mapdb.DBMaker) DB(org.mapdb.DB) IndexServiceUtils.extractType(io.hetu.core.heuristicindex.util.IndexServiceUtils.extractType) Index(io.prestosql.spi.heuristicindex.Index) Collections(java.util.Collections) InputStream(java.io.InputStream) HashMap(java.util.HashMap) DataOutputStream(java.io.DataOutputStream) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) RoaringBitmap(org.roaringbitmap.RoaringBitmap) ImmutableRoaringBitmap(org.roaringbitmap.buffer.ImmutableRoaringBitmap) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentNavigableMap(java.util.concurrent.ConcurrentNavigableMap) BTreeMap(org.mapdb.BTreeMap) Pair(io.prestosql.spi.heuristicindex.Pair)

Example 18 with Index

use of io.prestosql.spi.heuristicindex.Index in project hetu-core by openlookeng.

the class BTreeIndex method deserialize.

@Override
public Index deserialize(InputStream in) throws IOException {
    try (OutputStream out = new FileOutputStream(dataFile)) {
        IOUtils.copy(new SnappyInputStream(in), out);
    }
    setupDB();
    Properties localProperties = getProperties();
    if (localProperties.getProperty(PartitionIndexWriter.SYMBOL_TABLE_KEY_NAME) != null) {
        this.symbolTable = SerializationUtils.deserializeMap(localProperties.getProperty(PartitionIndexWriter.SYMBOL_TABLE_KEY_NAME), s -> s, s -> s);
    }
    return this;
}
Also used : Enumeration(java.util.Enumeration) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) BuiltInFunctionHandle(io.prestosql.spi.function.BuiltInFunctionHandle) IndexServiceUtils.getSerializer(io.hetu.core.heuristicindex.util.IndexServiceUtils.getSerializer) IndexServiceUtils(io.hetu.core.heuristicindex.util.IndexServiceUtils) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) CallExpression(io.prestosql.spi.relation.CallExpression) SnappyInputStream(org.xerial.snappy.SnappyInputStream) SnappyOutputStream(org.xerial.snappy.SnappyOutputStream) OperatorType(io.prestosql.spi.function.OperatorType) Map(java.util.Map) TypeUtils.extractValueFromRowExpression(io.prestosql.spi.heuristicindex.TypeUtils.extractValueFromRowExpression) Signature(io.prestosql.spi.function.Signature) SpecialForm(io.prestosql.spi.relation.SpecialForm) OutputStream(java.io.OutputStream) Properties(java.util.Properties) Iterator(java.util.Iterator) Collection(java.util.Collection) IOUtils(org.apache.commons.compress.utils.IOUtils) FileOutputStream(java.io.FileOutputStream) Set(java.util.Set) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) UUID(java.util.UUID) Sets(com.google.common.collect.Sets) Pair(io.prestosql.spi.heuristicindex.Pair) File(java.io.File) Serializer(org.mapdb.Serializer) UncheckedIOException(java.io.UncheckedIOException) List(java.util.List) PartitionIndexWriter(io.hetu.core.heuristicindex.PartitionIndexWriter) BTreeMap(org.mapdb.BTreeMap) DBException(org.mapdb.DBException) CreateIndexMetadata(io.prestosql.spi.connector.CreateIndexMetadata) RowExpression(io.prestosql.spi.relation.RowExpression) DBMaker(org.mapdb.DBMaker) Optional(java.util.Optional) TempFolder(io.hetu.core.common.filesystem.TempFolder) IndexLookUpException(io.prestosql.spi.heuristicindex.IndexLookUpException) SerializationUtils(io.prestosql.spi.heuristicindex.SerializationUtils) DB(org.mapdb.DB) Index(io.prestosql.spi.heuristicindex.Index) Collections(java.util.Collections) InputStream(java.io.InputStream) SnappyInputStream(org.xerial.snappy.SnappyInputStream) SnappyOutputStream(org.xerial.snappy.SnappyOutputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) FileOutputStream(java.io.FileOutputStream) Properties(java.util.Properties)

Example 19 with Index

use of io.prestosql.spi.heuristicindex.Index in project hetu-core by openlookeng.

the class FileIndexWriter method addData.

/**
 * This method IS thread-safe. Multiple operators can add data to one writer in parallel.
 *
 * @param values            values to be indexed
 * @param connectorMetadata metadata for the index
 */
@Override
public void addData(Map<String, List<Object>> values, Properties connectorMetadata) throws IOException {
    long stripeOffset = Long.parseLong(connectorMetadata.getProperty(DATASOURCE_STRIPE_OFFSET));
    // Add values first
    indexPages.computeIfAbsent(stripeOffset, k -> new ConcurrentHashMap<>());
    for (Map.Entry<String, List<Object>> e : values.entrySet()) {
        indexPages.get(stripeOffset).computeIfAbsent(e.getKey(), k -> Collections.synchronizedList(new LinkedList<>())).add(new AbstractMap.SimpleEntry(e.getValue(), Integer.parseInt(connectorMetadata.getProperty(DATASOURCE_PAGE_NUMBER))));
    }
    // Update page count
    int current = pageCountExpected.computeIfAbsent(stripeOffset, k -> new AtomicInteger()).decrementAndGet();
    if (connectorMetadata.getProperty(DATASOURCE_TOTAL_PAGES) != null) {
        int expected = Integer.parseInt(connectorMetadata.getProperty(DATASOURCE_TOTAL_PAGES));
        int updatedCurrent = pageCountExpected.get(stripeOffset).addAndGet(expected);
        LOG.debug("offset %d finishing page received, expected page count: %d, actual received: %d, remaining: %d", stripeOffset, expected, -current, updatedCurrent);
    }
    // Check page count to know if all pages have been received for a stripe. Persist and delete values if true to save memory
    if (pageCountExpected.get(stripeOffset).get() == 0) {
        synchronized (pageCountExpected.get(stripeOffset)) {
            if (indexPages.containsKey(stripeOffset)) {
                LOG.debug("All pages for offset %d have been received. Persisting.", stripeOffset);
                // sort the stripe's pages and collect the values into a single list
                List<Pair<String, List<Object>>> columnValuesMap = new ArrayList<>();
                // each entry represents a mapping from column name -> list<entry<page values, page number>>
                for (Map.Entry<String, List<Map.Entry<List<Object>, Integer>>> entry : indexPages.get(stripeOffset).entrySet()) {
                    // sort the page values lists based on page numbers
                    entry.getValue().sort(Comparator.comparingInt(Map.Entry::getValue));
                    // collect all page values lists into a single list
                    List<Object> columnValues = entry.getValue().stream().map(Map.Entry::getKey).flatMap(Collection::stream).collect(Collectors.toList());
                    columnValuesMap.add(new Pair(entry.getKey(), columnValues));
                }
                persistStripe(stripeOffset, columnValuesMap);
                indexPages.remove(stripeOffset);
            } else {
                LOG.debug("All pages for offset %d have been received, but the values are missing. " + "This stripe should have already been persisted by another thread.", stripeOffset);
            }
        }
    }
}
Also used : DATASOURCE_TOTAL_PAGES(io.prestosql.spi.HetuConstant.DATASOURCE_TOTAL_PAGES) Logger(io.airlift.log.Logger) HetuFileSystemClient(io.prestosql.spi.filesystem.HetuFileSystemClient) IndexConstants(io.hetu.core.heuristicindex.util.IndexConstants) IndexServiceUtils(io.hetu.core.heuristicindex.util.IndexServiceUtils) HetuLocalFileSystemClient(io.hetu.core.filesystem.HetuLocalFileSystemClient) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) DATASOURCE_STRIPE_OFFSET(io.prestosql.spi.HetuConstant.DATASOURCE_STRIPE_OFFSET) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HetuConstant(io.prestosql.spi.HetuConstant) Locale(java.util.Locale) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) LinkedList(java.util.LinkedList) Path(java.nio.file.Path) OutputStream(java.io.OutputStream) Properties(java.util.Properties) IndexWriter(io.prestosql.spi.heuristicindex.IndexWriter) Files(java.nio.file.Files) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) IOException(java.io.IOException) LocalConfig(io.hetu.core.filesystem.LocalConfig) Collectors(java.util.stream.Collectors) Pair(io.prestosql.spi.heuristicindex.Pair) DATASOURCE_PAGE_NUMBER(io.prestosql.spi.HetuConstant.DATASOURCE_PAGE_NUMBER) AbstractMap(java.util.AbstractMap) List(java.util.List) Paths(java.nio.file.Paths) CreateIndexMetadata(io.prestosql.spi.connector.CreateIndexMetadata) Comparator(java.util.Comparator) Index(io.prestosql.spi.heuristicindex.Index) Collections(java.util.Collections) ArrayList(java.util.ArrayList) AbstractMap(java.util.AbstractMap) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) AbstractMap(java.util.AbstractMap) Pair(io.prestosql.spi.heuristicindex.Pair)

Example 20 with Index

use of io.prestosql.spi.heuristicindex.Index in project hetu-core by openlookeng.

the class HeuristicIndexClient method readPartitionIndex.

@Override
public List<IndexMetadata> readPartitionIndex(String path) throws IOException {
    Path indexKeyPath = Paths.get(path);
    Path absolutePath = Paths.get(this.root.toString(), path);
    String tableName = indexKeyPath.subpath(0, 1).toString();
    String column = indexKeyPath.subpath(1, 2).toString();
    List<IndexMetadata> result = new ArrayList<>();
    if (fs.exists(absolutePath)) {
        List<Path> paths = fs.walk(absolutePath).filter(p -> !fs.isDirectory(p)).collect(Collectors.toList());
        for (Path filePath : paths) {
            BTreeIndex index = new BTreeIndex();
            InputStream inputStream = fs.newInputStream(filePath);
            index.deserialize(inputStream);
            IndexMetadata indexMetadata = new IndexMetadata(index, tableName, new String[] { column }, root.toString(), filePath.toString(), 0L, 0L);
            result.add(indexMetadata);
        }
        return result;
    } else {
        LOG.debug("File path doesn't exists" + absolutePath);
        return ImmutableList.of();
    }
}
Also used : Path(java.nio.file.Path) HetuMetastore(io.prestosql.spi.metastore.HetuMetastore) Logger(io.airlift.log.Logger) HetuFileSystemClient(io.prestosql.spi.filesystem.HetuFileSystemClient) ArchiveEntry(org.apache.commons.compress.archivers.ArchiveEntry) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) IndexConstants(io.hetu.core.heuristicindex.util.IndexConstants) FileBasedLock(io.prestosql.spi.filesystem.FileBasedLock) ArrayList(java.util.ArrayList) BTreeIndex(io.hetu.core.plugin.heuristicindex.index.btree.BTreeIndex) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ImmutableList(com.google.common.collect.ImmutableList) CloseShieldInputStream(org.apache.commons.io.input.CloseShieldInputStream) IndexClient(io.prestosql.spi.heuristicindex.IndexClient) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) LinkedList(java.util.LinkedList) IndexRecord(io.prestosql.spi.heuristicindex.IndexRecord) Path(java.nio.file.Path) ImmutableMap(com.google.common.collect.ImmutableMap) FileSystemException(java.nio.file.FileSystemException) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Pair(io.prestosql.spi.heuristicindex.Pair) UncheckedIOException(java.io.UncheckedIOException) AtomicLong(java.util.concurrent.atomic.AtomicLong) List(java.util.List) Lock(java.util.concurrent.locks.Lock) Stream(java.util.stream.Stream) Paths(java.nio.file.Paths) CreateIndexMetadata(io.prestosql.spi.connector.CreateIndexMetadata) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) Index(io.prestosql.spi.heuristicindex.Index) Collections(java.util.Collections) SecurePathWhiteList(io.hetu.core.common.util.SecurePathWhiteList) InputStream(java.io.InputStream) BTreeIndex(io.hetu.core.plugin.heuristicindex.index.btree.BTreeIndex) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) CloseShieldInputStream(org.apache.commons.io.input.CloseShieldInputStream) InputStream(java.io.InputStream) ArrayList(java.util.ArrayList) CreateIndexMetadata(io.prestosql.spi.connector.CreateIndexMetadata) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata)

Aggregations

Index (io.prestosql.spi.heuristicindex.Index)24 LinkedList (java.util.LinkedList)17 IndexMetadata (io.prestosql.spi.heuristicindex.IndexMetadata)15 Test (org.testng.annotations.Test)14 DataSize (io.airlift.units.DataSize)12 NoOpIndexClient (io.prestosql.testing.NoOpIndexClient)11 Returns (org.mockito.internal.stubbing.answers.Returns)11 ArrayList (java.util.ArrayList)10 IOException (java.io.IOException)9 List (java.util.List)9 HiveSplit (io.prestosql.plugin.hive.HiveSplit)8 Pair (io.prestosql.spi.heuristicindex.Pair)8 Collections (java.util.Collections)7 Map (java.util.Map)7 CreateIndexMetadata (io.prestosql.spi.connector.CreateIndexMetadata)6 ImmutableMap (com.google.common.collect.ImmutableMap)5 Logger (io.airlift.log.Logger)5 File (java.io.File)5 FileInputStream (java.io.FileInputStream)5 FileOutputStream (java.io.FileOutputStream)5