Search in sources :

Example 11 with Pair

use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.

the class TestPartitionIndexWriter method testAddValueMultThread.

@Test
public void testAddValueMultThread() throws InterruptedException {
    List<Pair<String, Type>> columns = new ArrayList<>();
    List<String> partitions = Collections.singletonList("partition1");
    Properties properties = new Properties();
    CreateIndexMetadata createIndexMetadata = new CreateIndexMetadata("hetu_partition_idx", "testTable", "BTREE", 0L, columns, partitions, properties, "testuser", CreateIndexMetadata.Level.PARTITION);
    HetuFileSystemClient fileSystemClient = Mockito.mock(HetuFileSystemClient.class);
    Properties connectorMetadata1 = new Properties();
    connectorMetadata1.setProperty(HetuConstant.DATASOURCE_FILE_MODIFICATION, String.valueOf(System.currentTimeMillis()));
    connectorMetadata1.setProperty(HetuConstant.DATASOURCE_FILE_PATH, "hdfs://testable/testcolumn/cp=123121/file1");
    connectorMetadata1.setProperty(HetuConstant.DATASOURCE_STRIPE_OFFSET, "3");
    connectorMetadata1.setProperty(HetuConstant.DATASOURCE_STRIPE_LENGTH, "100");
    Properties connectorMetadata2 = new Properties();
    connectorMetadata2.setProperty(HetuConstant.DATASOURCE_FILE_MODIFICATION, String.valueOf(System.currentTimeMillis()));
    connectorMetadata2.setProperty(HetuConstant.DATASOURCE_FILE_PATH, "hdfs://testable/testcolumn/cp=123121/file2");
    connectorMetadata2.setProperty(HetuConstant.DATASOURCE_STRIPE_OFFSET, "3");
    connectorMetadata2.setProperty(HetuConstant.DATASOURCE_STRIPE_LENGTH, "100");
    PartitionIndexWriter indexWriter = new PartitionIndexWriter(createIndexMetadata, fileSystemClient, Paths.get("/tmp"));
    ExecutorService executorService = Executors.newFixedThreadPool(2);
    CountDownLatch latch = new CountDownLatch(2);
    executorService.submit(new TestDriver(indexWriter, connectorMetadata1, latch));
    executorService.submit(new TestDriver(indexWriter, connectorMetadata2, latch));
    latch.await(5, TimeUnit.SECONDS);
    Map<Comparable<? extends Comparable<?>>, String> result = indexWriter.getDataMap();
    assertEquals(10, result.size());
    assertEquals(2, indexWriter.getSymbolTable().size());
}
Also used : CreateIndexMetadata(io.prestosql.spi.connector.CreateIndexMetadata) ArrayList(java.util.ArrayList) Properties(java.util.Properties) CountDownLatch(java.util.concurrent.CountDownLatch) HetuFileSystemClient(io.prestosql.spi.filesystem.HetuFileSystemClient) ExecutorService(java.util.concurrent.ExecutorService) Pair(io.prestosql.spi.heuristicindex.Pair) Test(org.testng.annotations.Test)

Example 12 with Pair

use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.

the class TestBTreeIndex method testLessThanEqualTo.

@Test
public void testLessThanEqualTo() throws IOException, IndexLookUpException {
    BTreeIndex index = new BTreeIndex();
    for (int i = 0; i < 100; i++) {
        List<Pair> pairs = new ArrayList<>();
        Long key = Long.valueOf(100 + i);
        String value = "value" + i;
        pairs.add(new Pair(key, value));
        Pair pair = new Pair("dummyCol", pairs);
        index.addKeyValues(Collections.singletonList(pair));
    }
    File file = getFile();
    index.serialize(new FileOutputStream(file));
    BTreeIndex readIndex = new BTreeIndex();
    readIndex.deserialize(new FileInputStream(file));
    RowExpression comparisonExpression = simplePredicate(OperatorType.LESS_THAN_OR_EQUAL, "dummyCol", BIGINT, 120L);
    Iterator<String> result = readIndex.lookUp(comparisonExpression);
    assertNotNull(result, "Result shouldn't be null");
    assertTrue(result.hasNext());
    Object[] arr = IntStream.iterate(0, n -> n + 1).limit(21).mapToObj(i -> "value" + i).toArray();
    Arrays.sort(arr);
    for (int i = 0; i <= 20; i++) {
        assertEquals(arr[i], result.next());
    }
    assertFalse(result.hasNext());
    index.close();
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) ConstantExpression(io.prestosql.spi.relation.ConstantExpression) Assert.assertEquals(org.testng.Assert.assertEquals) Test(org.testng.annotations.Test) ArrayList(java.util.ArrayList) VARCHAR(io.prestosql.spi.type.VarcharType.VARCHAR) OperatorType(io.prestosql.spi.function.OperatorType) BOOLEAN(io.prestosql.spi.type.BooleanType.BOOLEAN) SpecialForm(io.prestosql.spi.relation.SpecialForm) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Assert.assertFalse(org.testng.Assert.assertFalse) Iterator(java.util.Iterator) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) VariableReferenceExpression(io.prestosql.spi.relation.VariableReferenceExpression) FileInputStream(java.io.FileInputStream) UUID(java.util.UUID) Assert.assertNotNull(org.testng.Assert.assertNotNull) Pair(io.prestosql.spi.heuristicindex.Pair) File(java.io.File) List(java.util.List) HeuristicIndexTestUtils.simplePredicate(io.hetu.core.HeuristicIndexTestUtils.simplePredicate) RowExpression(io.prestosql.spi.relation.RowExpression) Assert.assertTrue(org.testng.Assert.assertTrue) IndexLookUpException(io.prestosql.spi.heuristicindex.IndexLookUpException) Index(io.prestosql.spi.heuristicindex.Index) Collections(java.util.Collections) ArrayList(java.util.ArrayList) RowExpression(io.prestosql.spi.relation.RowExpression) FileInputStream(java.io.FileInputStream) FileOutputStream(java.io.FileOutputStream) File(java.io.File) Pair(io.prestosql.spi.heuristicindex.Pair) Test(org.testng.annotations.Test)

Example 13 with Pair

use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.

the class TestBTreeIndex method testDeserialize.

@Test
public void testDeserialize() throws IOException, IndexLookUpException {
    BTreeIndex index = new BTreeIndex();
    String value = "foo bar";
    for (int i = 0; i < 1000; i++) {
        List<Pair> pairs = new ArrayList<>();
        Long key = Long.valueOf(100 + i);
        pairs.add(new Pair(key, value));
        Pair pair = new Pair("dummyCol", pairs);
        index.addKeyValues(Collections.singletonList(pair));
    }
    File file = File.createTempFile("test-serialize-", UUID.randomUUID().toString());
    index.serialize(new FileOutputStream(file));
    Index readindex = new BTreeIndex();
    readindex.deserialize(new FileInputStream(file));
    RowExpression comparisonExpression = simplePredicate(OperatorType.EQUAL, "column", BIGINT, 101L);
    Iterator<String> result = readindex.lookUp(comparisonExpression);
    assertNotNull(result, "Result shouldn't be null");
    assertTrue(result.hasNext());
    assertEquals(value, result.next());
    index.close();
}
Also used : ArrayList(java.util.ArrayList) RowExpression(io.prestosql.spi.relation.RowExpression) Index(io.prestosql.spi.heuristicindex.Index) FileInputStream(java.io.FileInputStream) FileOutputStream(java.io.FileOutputStream) File(java.io.File) Pair(io.prestosql.spi.heuristicindex.Pair) Test(org.testng.annotations.Test)

Example 14 with Pair

use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.

the class TestBTreeIndex method testLookup.

@Test
public void testLookup() throws IOException, IndexLookUpException {
    BTreeIndex index = new BTreeIndex();
    for (int i = 0; i < 100; i++) {
        List<Pair> pairs = new ArrayList<>();
        Long key = Long.valueOf(100 + i);
        String value = "value" + i;
        pairs.add(new Pair(key, value));
        Pair pair = new Pair("dummyCol", pairs);
        index.addKeyValues(Collections.singletonList(pair));
    }
    File file = getFile();
    index.serialize(new FileOutputStream(file));
    BTreeIndex readIndex = new BTreeIndex();
    readIndex.deserialize(new FileInputStream(file));
    RowExpression comparisonExpression = simplePredicate(OperatorType.EQUAL, "dummyCol", BIGINT, 101L);
    Iterator<String> result = readIndex.lookUp(comparisonExpression);
    assertNotNull(result, "Result shouldn't be null");
    assertTrue(result.hasNext());
    assertEquals("value1", result.next().toString());
    index.close();
}
Also used : FileOutputStream(java.io.FileOutputStream) ArrayList(java.util.ArrayList) RowExpression(io.prestosql.spi.relation.RowExpression) File(java.io.File) FileInputStream(java.io.FileInputStream) Pair(io.prestosql.spi.heuristicindex.Pair) Test(org.testng.annotations.Test)

Example 15 with Pair

use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.

the class SplitFiltering method filterUsingInvertedIndex.

private static List<Split> filterUsingInvertedIndex(RowExpression expression, List<Split> inputSplits, String fullQualifiedTableName, Set<String> referencedColumns, Map<String, IndexRecord> indexRecordKeyToRecordMap, HeuristicIndexerManager indexerManager) {
    try {
        Map<String, Long> inputMaxLastUpdated = new HashMap<>();
        Map<String, Long> indexMaxLastUpdated = new HashMap<>();
        Map<String, List<Split>> partitionSplitMap = new HashMap<>();
        for (Split split : inputSplits) {
            String filePathStr = split.getConnectorSplit().getFilePath();
            String indexKey = getPartitionKeyOrElse(filePathStr, TABLE_LEVEL_KEY);
            long lastUpdated = split.getConnectorSplit().getLastModifiedTime();
            if (!inputMaxLastUpdated.containsKey(indexKey) || lastUpdated > inputMaxLastUpdated.get(indexKey)) {
                inputMaxLastUpdated.put(indexKey, lastUpdated);
            }
            if (!partitionSplitMap.containsKey(indexKey)) {
                partitionSplitMap.put(indexKey, new ArrayList<>());
            }
            partitionSplitMap.get(indexKey).add(split);
        }
        // Split is not compliant to table structure. Return all the splits
        if (partitionSplitMap.isEmpty()) {
            return inputSplits;
        }
        // col -> list of all indices on this column (all partitions)
        Map<String, List<IndexMetadata>> allIndices = new HashMap<>();
        // index loading and verification
        for (String column : referencedColumns) {
            List<IndexMetadata> indexMetadataList = new ArrayList<>();
            for (String indexType : INVERTED_INDEX) {
                indexMetadataList.addAll(getCache(indexerManager.getIndexClient()).getIndices(fullQualifiedTableName, column, indexType, partitionSplitMap.keySet(), Collections.max(inputMaxLastUpdated.values()), indexRecordKeyToRecordMap));
            }
            // If any of the split contains data which is modified after the index was created, return without filtering
            for (IndexMetadata index : indexMetadataList) {
                String partitionKey = getPartitionKeyOrElse(index.getUri(), TABLE_LEVEL_KEY);
                long lastModifiedTime = Long.parseLong(index.getIndex().getProperties().getProperty(MAX_MODIFIED_TIME));
                indexMaxLastUpdated.put(partitionKey, lastModifiedTime);
            }
            allIndices.put(column, indexMetadataList);
        }
        // lookup index
        IndexFilter filter = indexerManager.getIndexFilter(allIndices);
        Iterator<String> iterator = filter.lookUp(expression);
        if (iterator == null) {
            throw new IndexLookUpException();
        }
        // all positioned looked up from index, organized by file path
        Map<String, List<Pair<Long, Long>>> lookUpResults = new HashMap<>();
        while (iterator.hasNext()) {
            SerializationUtils.LookUpResult parsedLookUpResult = deserializeStripeSymbol(iterator.next());
            if (!lookUpResults.containsKey(parsedLookUpResult.filepath)) {
                lookUpResults.put(parsedLookUpResult.filepath, new ArrayList<>());
            }
            lookUpResults.get(parsedLookUpResult.filepath).add(parsedLookUpResult.stripe);
        }
        // filtering
        List<Split> filteredSplits = new ArrayList<>();
        for (Map.Entry<String, List<Split>> entry : partitionSplitMap.entrySet()) {
            String partitionKey = entry.getKey();
            // the partition is indexed by its own partition's index
            boolean partitionHasOwnIndex = indexMaxLastUpdated.containsKey(partitionKey);
            // the partition is covered by a table-level index
            boolean partitionHasTableLevelIndex = indexMaxLastUpdated.size() == 1 && indexMaxLastUpdated.containsKey(TABLE_LEVEL_KEY);
            if (!partitionHasOwnIndex && !partitionHasTableLevelIndex) {
                filteredSplits.addAll(entry.getValue());
            } else {
                long indexLastModifiedTimeOfThisPartition;
                if (partitionHasOwnIndex) {
                    indexLastModifiedTimeOfThisPartition = indexMaxLastUpdated.get(partitionKey);
                } else {
                    indexLastModifiedTimeOfThisPartition = indexMaxLastUpdated.get(TABLE_LEVEL_KEY);
                }
                for (Split split : entry.getValue()) {
                    String filePathStr = new URI(split.getConnectorSplit().getFilePath()).getPath();
                    if (split.getConnectorSplit().getLastModifiedTime() > indexLastModifiedTimeOfThisPartition) {
                        filteredSplits.add(split);
                    } else if (lookUpResults.containsKey(filePathStr)) {
                        Pair<Long, Long> targetRange = new Pair<>(split.getConnectorSplit().getStartIndex(), split.getConnectorSplit().getEndIndex());
                        // do stripe matching: check if [targetStart, targetEnd] has any overlapping with the matching stripes
                        // first sort matching stripes, e.g. (5,10), (18,25), (30,35), (35, 40)
                        // then do binary search for both start and end of the target
                        List<Pair<Long, Long>> stripes = lookUpResults.get(filePathStr);
                        stripes.sort(Comparator.comparingLong(Pair::getFirst));
                        if (rangeSearch(stripes, targetRange)) {
                            filteredSplits.add(split);
                        }
                    }
                }
            }
        }
        return filteredSplits;
    } catch (Throwable e) {
        LOG.debug("Exception occurred while filtering. Returning original splits", e);
        return inputSplits;
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SerializationUtils(io.prestosql.spi.heuristicindex.SerializationUtils) URI(java.net.URI) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) LinkedList(java.util.LinkedList) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) Pair(io.prestosql.spi.heuristicindex.Pair) IndexLookUpException(io.prestosql.spi.heuristicindex.IndexLookUpException) AtomicLong(java.util.concurrent.atomic.AtomicLong) IndexFilter(io.prestosql.spi.heuristicindex.IndexFilter) Split(io.prestosql.metadata.Split) Map(java.util.Map) HashMap(java.util.HashMap)

Aggregations

Pair (io.prestosql.spi.heuristicindex.Pair)38 Test (org.testng.annotations.Test)25 File (java.io.File)24 FileOutputStream (java.io.FileOutputStream)24 FileInputStream (java.io.FileInputStream)23 ArrayList (java.util.ArrayList)22 RowExpression (io.prestosql.spi.relation.RowExpression)14 TempFolder (io.hetu.core.common.filesystem.TempFolder)12 List (java.util.List)10 IOException (java.io.IOException)9 CreateIndexMetadata (io.prestosql.spi.connector.CreateIndexMetadata)8 Map (java.util.Map)8 Properties (java.util.Properties)8 HashMap (java.util.HashMap)7 Iterator (java.util.Iterator)7 Collections (java.util.Collections)6 Index (io.prestosql.spi.heuristicindex.Index)5 Objects.requireNonNull (java.util.Objects.requireNonNull)5 Preconditions.checkState (com.google.common.base.Preconditions.checkState)4 HeuristicIndexerManager (io.prestosql.heuristicindex.HeuristicIndexerManager)4