Search in sources :

Example 21 with Pair

use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.

the class PartitionIndexWriter method persist.

/**
 * Persists the data into an Index object and serialize it to disk.
 */
@Override
public long persist() throws IOException {
    persistLock.lock();
    try {
        // inverse map from symbol -> id to id -> symbol for better lookup performance
        Map<String, String> idToSymbolMap = symbolToIdMap.entrySet().stream().collect(Collectors.toMap(Map.Entry::getValue, Map.Entry::getKey));
        String serializedSymbolTable = serializeMap(idToSymbolMap);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Symbol table size: " + idToSymbolMap.size());
            LOG.debug("Output map size: " + dataMap.size());
            LOG.debug("symbol table: " + serializedSymbolTable);
        }
        String dbPath = "";
        for (Pair<String, Type> entry : createIndexMetadata.getIndexColumns()) {
            if (partition != null) {
                dbPath = this.root + "/" + createIndexMetadata.getTableName() + "/" + entry.getFirst().toLowerCase(Locale.ENGLISH) + "/" + createIndexMetadata.getIndexType().toUpperCase() + "/" + partition;
            } else {
                dbPath = this.root + "/" + createIndexMetadata.getTableName() + "/" + entry.getFirst().toLowerCase(Locale.ENGLISH) + "/" + createIndexMetadata.getIndexType().toUpperCase();
            }
            partitionIndex = HeuristicIndexFactory.createIndex(createIndexMetadata.getIndexType());
        }
        // check required for security scan since we are constructing a path using input
        checkArgument(!dbPath.toString().contains("../"), dbPath + " must be absolute and under one of the following whitelisted directories:  " + SecurePathWhiteList.getSecurePathWhiteList().toString());
        checkArgument(SecurePathWhiteList.isSecurePath(dbPath), dbPath + " must be under one of the following whitelisted directories: " + SecurePathWhiteList.getSecurePathWhiteList().toString());
        List<Pair<Comparable<? extends Comparable<?>>, String>> values = new ArrayList<>(dataMap.size());
        for (Map.Entry<Comparable<? extends Comparable<?>>, String> entry : dataMap.entrySet()) {
            values.add(new Pair<>(entry.getKey(), entry.getValue()));
        }
        String columnName = createIndexMetadata.getIndexColumns().get(0).getFirst().toLowerCase(Locale.ENGLISH);
        partitionIndex.addKeyValues(Collections.singletonList(new Pair<>(columnName, values)));
        properties.put(SYMBOL_TABLE_KEY_NAME, serializedSymbolTable);
        properties.put(MAX_MODIFIED_TIME, String.valueOf(maxLastModifiedTime));
        partitionIndex.setProperties(properties);
        Path filePath = Paths.get(dbPath + "/" + IndexConstants.LAST_MODIFIED_FILE_PREFIX + maxLastModifiedTime);
        // check required for security scan since we are constructing a path using input
        checkArgument(!filePath.toString().contains("../"), filePath + " must be absolute and under one of the following whitelisted directories:  " + SecurePathWhiteList.getSecurePathWhiteList().toString());
        checkArgument(SecurePathWhiteList.isSecurePath(dbPath), filePath + " must be under one of the following whitelisted directories: " + SecurePathWhiteList.getSecurePathWhiteList().toString());
        List<Path> oldFiles = Collections.emptyList();
        if (fs.exists(Paths.get(dbPath))) {
            oldFiles = fs.walk(Paths.get(dbPath)).filter(p -> !fs.isDirectory(p)).collect(Collectors.toList());
        }
        for (Path oldFile : oldFiles) {
            fs.deleteIfExists(oldFile);
        }
        fs.createDirectories(filePath.getParent());
        try (OutputStream os = fs.newOutputStream(filePath)) {
            partitionIndex.serialize(os);
        } catch (IOException e) {
            // roll back creation
            fs.delete(filePath);
            throw e;
        }
        return (long) fs.getAttribute(filePath, "size");
    } finally {
        if (partitionIndex != null) {
            partitionIndex.close();
        }
        persistLock.unlock();
    }
}
Also used : Path(java.nio.file.Path) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) IOException(java.io.IOException) Type(io.prestosql.spi.type.Type) Map(java.util.Map) SerializationUtils.serializeMap(io.prestosql.spi.heuristicindex.SerializationUtils.serializeMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Pair(io.prestosql.spi.heuristicindex.Pair)

Example 22 with Pair

use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.

the class TestBTreeIndex method testSerialize.

@Test
public void testSerialize() throws IOException {
    BTreeIndex index = new BTreeIndex();
    String value = "001:3,002:3,003:3,004:3,005:3,006:3,007:3,008:3,009:3,002:3,010:3,002:3,011:3,012:3,101:3,102:3,103:3,104:3,105:3,106:3,107:3,108:3,109:3,102:3,110:3,102:3,111:3,112:3";
    for (int i = 0; i < 1000; i++) {
        List<Pair> pairs = new ArrayList<>();
        Long key = Long.valueOf(100 + i);
        pairs.add(new Pair(key, value));
        Pair pair = new Pair("dummyCol", pairs);
        index.addKeyValues(Collections.singletonList(pair));
    }
    File file = File.createTempFile("test-serialize-", UUID.randomUUID().toString());
    index.serialize(new FileOutputStream(file));
    assertTrue(file.delete());
    index.close();
}
Also used : FileOutputStream(java.io.FileOutputStream) ArrayList(java.util.ArrayList) File(java.io.File) Pair(io.prestosql.spi.heuristicindex.Pair) Test(org.testng.annotations.Test)

Example 23 with Pair

use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.

the class TestBTreeIndex method testGreaterThanEqualTo.

@Test
public void testGreaterThanEqualTo() throws IOException, IndexLookUpException {
    BTreeIndex index = new BTreeIndex();
    for (int i = 0; i < 100; i++) {
        List<Pair> pairs = new ArrayList<>();
        Long key = Long.valueOf(100 + i);
        String value = "value" + i;
        pairs.add(new Pair(key, value));
        Pair pair = new Pair("dummyCol", pairs);
        index.addKeyValues(Collections.singletonList(pair));
    }
    File file = getFile();
    index.serialize(new FileOutputStream(file));
    BTreeIndex readIndex = new BTreeIndex();
    readIndex.deserialize(new FileInputStream(file));
    RowExpression comparisonExpression = simplePredicate(OperatorType.GREATER_THAN_OR_EQUAL, "dummyCol", BIGINT, 120L);
    Iterator<String> result = readIndex.lookUp(comparisonExpression);
    assertNotNull(result, "Result shouldn't be null");
    for (int i = 20; i < 100; i++) {
        Object data = result.next();
        assertEquals("value" + i, data.toString());
    }
    assertFalse(result.hasNext());
    index.close();
}
Also used : ArrayList(java.util.ArrayList) RowExpression(io.prestosql.spi.relation.RowExpression) FileInputStream(java.io.FileInputStream) FileOutputStream(java.io.FileOutputStream) File(java.io.File) Pair(io.prestosql.spi.heuristicindex.Pair) Test(org.testng.annotations.Test)

Example 24 with Pair

use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.

the class TestBTreeIndex method testLongKey.

@Test
public void testLongKey() throws IOException {
    BTreeIndex index = new BTreeIndex();
    String value = "001:3,002:3,003:3,004:3,005:3,006:3,007:3,008:3,009:3,002:3,010:3,002:3,011:3,012:3,101:3,102:3,103:3,104:3,105:3,106:3,107:3,108:3,109:3,102:3,110:3,102:3,111:3,112:3";
    List<Pair> pairs = new ArrayList<>();
    Long key = 1211231231L;
    pairs.add(new Pair(key, value));
    Pair pair = new Pair("dummyCol", pairs);
    index.addKeyValues(Collections.singletonList(pair));
    File file = getFile();
    index.serialize(new FileOutputStream(file));
    BTreeIndex readIndex = new BTreeIndex();
    readIndex.deserialize(new FileInputStream(file));
    RowExpression comparisonExpression = simplePredicate(OperatorType.EQUAL, "dummyCol", BIGINT, key);
    assertTrue(readIndex.matches(comparisonExpression), "Key should exists");
}
Also used : FileOutputStream(java.io.FileOutputStream) ArrayList(java.util.ArrayList) RowExpression(io.prestosql.spi.relation.RowExpression) File(java.io.File) FileInputStream(java.io.FileInputStream) Pair(io.prestosql.spi.heuristicindex.Pair) Test(org.testng.annotations.Test)

Example 25 with Pair

use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.

the class TestBTreeIndex method testBetween.

@Test
public void testBetween() throws IOException, IndexLookUpException {
    BTreeIndex index = new BTreeIndex();
    for (int i = 0; i < 20; i++) {
        List<Pair> pairs = new ArrayList<>();
        Long key = Long.valueOf(100 + i);
        String value = "value" + i;
        pairs.add(new Pair(key, value));
        Pair pair = new Pair("dummyCol", pairs);
        index.addKeyValues(Collections.singletonList(pair));
    }
    File file = getFile();
    index.serialize(new FileOutputStream(file));
    BTreeIndex readIndex = new BTreeIndex();
    readIndex.deserialize(new FileInputStream(file));
    RowExpression betweenPredicate = new SpecialForm(SpecialForm.Form.BETWEEN, BOOLEAN, new VariableReferenceExpression("dummyCol", VARCHAR), new ConstantExpression(111L, BIGINT), new ConstantExpression(114L, BIGINT));
    Iterator<String> result = readIndex.lookUp(betweenPredicate);
    assertNotNull(result, "Result shouldn't be null");
    assertTrue(result.hasNext());
    for (int i = 11; i <= 14; i++) {
        assertEquals("value" + i, result.next());
    }
    assertFalse(result.hasNext());
    index.close();
}
Also used : ConstantExpression(io.prestosql.spi.relation.ConstantExpression) ArrayList(java.util.ArrayList) RowExpression(io.prestosql.spi.relation.RowExpression) FileInputStream(java.io.FileInputStream) VariableReferenceExpression(io.prestosql.spi.relation.VariableReferenceExpression) FileOutputStream(java.io.FileOutputStream) File(java.io.File) SpecialForm(io.prestosql.spi.relation.SpecialForm) Pair(io.prestosql.spi.heuristicindex.Pair) Test(org.testng.annotations.Test)

Aggregations

Pair (io.prestosql.spi.heuristicindex.Pair)38 Test (org.testng.annotations.Test)25 File (java.io.File)24 FileOutputStream (java.io.FileOutputStream)24 FileInputStream (java.io.FileInputStream)23 ArrayList (java.util.ArrayList)22 RowExpression (io.prestosql.spi.relation.RowExpression)14 TempFolder (io.hetu.core.common.filesystem.TempFolder)12 List (java.util.List)10 IOException (java.io.IOException)9 CreateIndexMetadata (io.prestosql.spi.connector.CreateIndexMetadata)8 Map (java.util.Map)8 Properties (java.util.Properties)8 HashMap (java.util.HashMap)7 Iterator (java.util.Iterator)7 Collections (java.util.Collections)6 Index (io.prestosql.spi.heuristicindex.Index)5 Objects.requireNonNull (java.util.Objects.requireNonNull)5 Preconditions.checkState (com.google.common.base.Preconditions.checkState)4 HeuristicIndexerManager (io.prestosql.heuristicindex.HeuristicIndexerManager)4