Search in sources :

Example 21 with TempFolder

use of io.hetu.core.common.filesystem.TempFolder in project hetu-core by openlookeng.

the class TestBloomIndex method testMmapUse.

@Test
public void testMmapUse() throws IOException {
    // experiment test to understand the performance of using mmap
    try (TempFolder folder = new TempFolder();
        BloomIndex objectBloomIndex = new BloomIndex();
        BloomIndex bloomIndexMemory = new BloomIndex();
        BloomIndex bloomIndexMmap = new BloomIndex();
        BloomIndex objectBloomIndexString = new BloomIndex();
        BloomIndex objectBloomIndexDouble = new BloomIndex();
        BloomIndex bloomIndexMemoryString = new BloomIndex();
        BloomIndex bloomIndexMemoryDouble = new BloomIndex();
        BloomIndex bloomIndexMmapDouble = new BloomIndex();
        BloomIndex bloomIndexMmapString = new BloomIndex()) {
        folder.create();
        int dataEntryNum = 2000000;
        int queryNum = 10000;
        long startTime;
        long stopTime;
        long elapsedTime;
        // compare the performance on int data with 2000000 values
        File testFile = folder.newFile("int");
        objectBloomIndex.setExpectedNumOfEntries(dataEntryNum);
        Random rd = new Random();
        List<Integer> arr = new ArrayList<>();
        for (int i = 0; i < dataEntryNum; i++) {
            arr.add(rd.nextInt());
        }
        objectBloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of(arr))));
        try (FileOutputStream fo = new FileOutputStream(testFile)) {
            objectBloomIndex.serialize(fo);
        }
        bloomIndexMemory.setMmapEnabled(false);
        bloomIndexMemory.setExpectedNumOfEntries(dataEntryNum);
        try (FileInputStream fi = new FileInputStream(testFile)) {
            bloomIndexMemory.deserialize(fi);
        }
        bloomIndexMmap.setMmapEnabled(true);
        bloomIndexMmap.setExpectedNumOfEntries(dataEntryNum);
        try (FileInputStream fi = new FileInputStream(testFile)) {
            bloomIndexMmap.deserialize(fi);
        }
        System.out.println(testFile);
        Random rdTest = new Random();
        // get query time using memory
        startTime = System.currentTimeMillis();
        for (int i = 0; i < queryNum; i++) {
            int testNum = rdTest.nextInt();
            RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", INTEGER, testNum);
            bloomIndexMemory.matches(expression);
        }
        stopTime = System.currentTimeMillis();
        elapsedTime = stopTime - startTime;
        System.out.println(elapsedTime);
        // get query time using mmap
        startTime = System.currentTimeMillis();
        for (int i = 0; i < queryNum; i++) {
            int testNum = rdTest.nextInt();
            RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", INTEGER, testNum);
            bloomIndexMmap.matches(expression);
        }
        stopTime = System.currentTimeMillis();
        elapsedTime = stopTime - startTime;
        System.out.println(elapsedTime);
        BloomFilter memoryFilter = bloomIndexMemory.getFilter();
        BloomFilter mmapFilter = bloomIndexMmap.getFilter();
        assertEquals(mmapFilter, memoryFilter);
        long usage1 = bloomIndexMemory.getMemoryUsage();
        long usage2 = bloomIndexMmap.getMemoryUsage();
        assertTrue(usage1 > usage2, "mmap should use less memory.");
        long fileUsage1 = bloomIndexMemory.getDiskUsage();
        long fileUsage2 = bloomIndexMmap.getDiskUsage();
        assertTrue(fileUsage1 < fileUsage2, "mmap should use file space.");
        // compare the performance on double data with 2000000 entries
        File testFileDouble = folder.newFile("double");
        objectBloomIndexDouble.setExpectedNumOfEntries(dataEntryNum);
        Random rdDouble = new Random();
        List<Double> arrDouble = new ArrayList<>();
        for (int i = 0; i < dataEntryNum; i++) {
            arrDouble.add(rdDouble.nextDouble());
        }
        objectBloomIndexDouble.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of(arrDouble))));
        try (FileOutputStream fo = new FileOutputStream(testFileDouble)) {
            objectBloomIndexDouble.serialize(fo);
        }
        bloomIndexMemoryDouble.setMmapEnabled(false);
        bloomIndexMemoryDouble.setExpectedNumOfEntries(dataEntryNum);
        try (FileInputStream fi = new FileInputStream(testFileDouble)) {
            bloomIndexMemoryDouble.deserialize(fi);
        }
        bloomIndexMmapDouble.setMmapEnabled(true);
        bloomIndexMmapDouble.setExpectedNumOfEntries(dataEntryNum);
        try (FileInputStream fi = new FileInputStream(testFileDouble)) {
            bloomIndexMmapDouble.deserialize(fi);
        }
        System.out.println(testFileDouble);
        Random rdTestDouble = new Random();
        // get query time using memory
        startTime = System.currentTimeMillis();
        for (int i = 0; i < queryNum; i++) {
            double testDouble = rdTestDouble.nextDouble();
            RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", DOUBLE, testDouble);
            bloomIndexMemoryDouble.matches(expression);
        }
        stopTime = System.currentTimeMillis();
        elapsedTime = stopTime - startTime;
        System.out.println(elapsedTime);
        // get query time using mmap
        startTime = System.currentTimeMillis();
        for (int i = 0; i < queryNum; i++) {
            double testDouble = rdTestDouble.nextDouble();
            RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", DOUBLE, testDouble);
            bloomIndexMmapDouble.matches(expression);
        }
        stopTime = System.currentTimeMillis();
        elapsedTime = stopTime - startTime;
        System.out.println(elapsedTime);
        memoryFilter = bloomIndexMemoryDouble.getFilter();
        mmapFilter = bloomIndexMmapDouble.getFilter();
        assertEquals(mmapFilter, memoryFilter);
        usage1 = bloomIndexMemoryDouble.getMemoryUsage();
        usage2 = bloomIndexMmapDouble.getMemoryUsage();
        assertTrue(usage1 > usage2, "mmap should use less memory.");
        fileUsage1 = bloomIndexMemoryDouble.getDiskUsage();
        fileUsage2 = bloomIndexMmapDouble.getDiskUsage();
        assertTrue(fileUsage1 < fileUsage2, "mmap should use file space.");
        // compare the performance on UUID string with 2000000 entries
        File testFileString = folder.newFile("string");
        objectBloomIndexString.setExpectedNumOfEntries(dataEntryNum);
        List<String> arrString = new ArrayList<>();
        for (int i = 0; i < dataEntryNum; i++) {
            arrString.add(UUID.randomUUID().toString());
        }
        objectBloomIndexString.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of(arrString))));
        try (FileOutputStream fo = new FileOutputStream(testFileString)) {
            objectBloomIndexString.serialize(fo);
        }
        bloomIndexMemoryString.setMmapEnabled(false);
        bloomIndexMemoryString.setExpectedNumOfEntries(dataEntryNum);
        try (FileInputStream fi = new FileInputStream(testFileString)) {
            bloomIndexMemoryString.deserialize(fi);
        }
        bloomIndexMmapString.setMmapEnabled(true);
        bloomIndexMmapString.setExpectedNumOfEntries(dataEntryNum);
        try (FileInputStream fi = new FileInputStream(testFileString)) {
            bloomIndexMmapString.deserialize(fi);
        }
        System.out.println(testFileString);
        // get query time using memory
        startTime = System.currentTimeMillis();
        for (int i = 0; i < queryNum; i++) {
            String testString = UUID.randomUUID().toString();
            RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", VARCHAR, testString);
            bloomIndexMemoryString.matches(expression);
        }
        stopTime = System.currentTimeMillis();
        elapsedTime = stopTime - startTime;
        System.out.println(elapsedTime);
        // get query time using mmap
        startTime = System.currentTimeMillis();
        for (int i = 0; i < queryNum; i++) {
            String testString = UUID.randomUUID().toString();
            RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", VARCHAR, testString);
            bloomIndexMmapString.matches(expression);
        }
        stopTime = System.currentTimeMillis();
        elapsedTime = stopTime - startTime;
        System.out.println(elapsedTime);
        memoryFilter = bloomIndexMemoryString.getFilter();
        mmapFilter = bloomIndexMmapString.getFilter();
        assertEquals(mmapFilter, memoryFilter);
        usage1 = bloomIndexMemoryString.getMemoryUsage();
        usage2 = bloomIndexMmapString.getMemoryUsage();
        assertTrue(usage1 > usage2, "mmap should use less memory.");
        fileUsage1 = bloomIndexMemoryString.getDiskUsage();
        fileUsage2 = bloomIndexMmapString.getDiskUsage();
        assertTrue(fileUsage1 < fileUsage2, "mmap should use file space.");
    }
}
Also used : ArrayList(java.util.ArrayList) RowExpression(io.prestosql.spi.relation.RowExpression) FileInputStream(java.io.FileInputStream) BloomFilter(io.prestosql.spi.util.BloomFilter) Random(java.util.Random) TempFolder(io.hetu.core.common.filesystem.TempFolder) FileOutputStream(java.io.FileOutputStream) File(java.io.File) Pair(io.prestosql.spi.heuristicindex.Pair) Test(org.testng.annotations.Test)

Example 22 with TempFolder

use of io.hetu.core.common.filesystem.TempFolder in project hetu-core by openlookeng.

the class TestBloomIndex method testPersist.

@Test
public void testPersist() throws IOException {
    try (TempFolder folder = new TempFolder();
        BloomIndex objectBloomIndex = new BloomIndex()) {
        folder.create();
        File testFile = folder.newFile();
        List<Object> testValues = ImmutableList.of("%#!", ":dfs", "测试", "\n", "ab", "a");
        objectBloomIndex.setExpectedNumOfEntries(testValues.size());
        objectBloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", testValues)));
        try (FileOutputStream fo = new FileOutputStream(testFile)) {
            objectBloomIndex.serialize(fo);
        }
        try (FileInputStream fi = new FileInputStream(testFile)) {
            assertTrue(fi.available() != 0, "Persisted bloom index file is empty");
        }
    }
}
Also used : TempFolder(io.hetu.core.common.filesystem.TempFolder) FileOutputStream(java.io.FileOutputStream) File(java.io.File) FileInputStream(java.io.FileInputStream) Pair(io.prestosql.spi.heuristicindex.Pair) Test(org.testng.annotations.Test)

Example 23 with TempFolder

use of io.hetu.core.common.filesystem.TempFolder in project hetu-core by openlookeng.

the class TestBloomIndex method testMatching.

@Test
public void testMatching() throws IOException {
    try (TempFolder folder = new TempFolder()) {
        folder.create();
        File testFile = folder.newFile();
        List<Object> testValues;
        // Test String bloom indexer
        try (BloomIndex stringBloomIndex = new BloomIndex()) {
            testValues = ImmutableList.of("a", "ab", "测试", "\n", "%#!", ":dfs");
            stringBloomIndex.setExpectedNumOfEntries(testValues.size());
            stringBloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", testValues)));
            try (FileOutputStream fo = new FileOutputStream(testFile)) {
                stringBloomIndex.serialize(fo);
            }
            try (FileInputStream fi = new FileInputStream(testFile)) {
                stringBloomIndex.deserialize(fi);
            }
            assertTrue(mightContain(stringBloomIndex, VARCHAR, "a"));
            assertTrue(mightContain(stringBloomIndex, VARCHAR, "ab"));
            assertTrue(mightContain(stringBloomIndex, VARCHAR, "测试"));
            assertTrue(mightContain(stringBloomIndex, VARCHAR, "\n"));
            assertTrue(mightContain(stringBloomIndex, VARCHAR, "%#!"));
            assertTrue(mightContain(stringBloomIndex, VARCHAR, ":dfs"));
            assertFalse(mightContain(stringBloomIndex, VARCHAR, "random"));
            assertFalse(mightContain(stringBloomIndex, VARCHAR, "abc"));
        }
        // Test with the generic type to be Object
        try (BloomIndex objectBloomIndex = new BloomIndex()) {
            testValues = ImmutableList.of("a", "ab", "测试", "\n", "%#!", ":dfs");
            objectBloomIndex.setExpectedNumOfEntries(testValues.size());
            objectBloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", testValues)));
            try (FileOutputStream fo = new FileOutputStream(testFile)) {
                objectBloomIndex.serialize(fo);
            }
            try (FileInputStream fi = new FileInputStream(testFile)) {
                objectBloomIndex.deserialize(fi);
            }
            assertTrue(mightContain(objectBloomIndex, VARCHAR, "a"));
            assertTrue(mightContain(objectBloomIndex, VARCHAR, "ab"));
            assertTrue(mightContain(objectBloomIndex, VARCHAR, "测试"));
            assertTrue(mightContain(objectBloomIndex, VARCHAR, "\n"));
            assertTrue(mightContain(objectBloomIndex, VARCHAR, "%#!"));
            assertTrue(mightContain(objectBloomIndex, VARCHAR, ":dfs"));
            assertFalse(mightContain(objectBloomIndex, VARCHAR, "random"));
            assertFalse(mightContain(objectBloomIndex, VARCHAR, "abc"));
        }
        // Test single insertion
        try (BloomIndex simpleBloomIndex = new BloomIndex()) {
            simpleBloomIndex.setExpectedNumOfEntries(6);
            simpleBloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of("a"))));
            simpleBloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of("ab"))));
            simpleBloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of("测试"))));
            simpleBloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of("\n"))));
            simpleBloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of("%#!"))));
            simpleBloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of(":dfs"))));
            try (FileOutputStream fo = new FileOutputStream(testFile)) {
                simpleBloomIndex.serialize(fo);
            }
            try (FileInputStream fi = new FileInputStream(testFile)) {
                simpleBloomIndex.deserialize(fi);
            }
            assertTrue(mightContain(simpleBloomIndex, VARCHAR, "a"));
            assertTrue(mightContain(simpleBloomIndex, VARCHAR, "ab"));
            assertTrue(mightContain(simpleBloomIndex, VARCHAR, "测试"));
            assertTrue(mightContain(simpleBloomIndex, VARCHAR, "\n"));
            assertTrue(mightContain(simpleBloomIndex, VARCHAR, "%#!"));
            assertTrue(mightContain(simpleBloomIndex, VARCHAR, ":dfs"));
            assertFalse(mightContain(simpleBloomIndex, VARCHAR, "random"));
            assertFalse(mightContain(simpleBloomIndex, VARCHAR, "abc"));
        }
    }
}
Also used : TempFolder(io.hetu.core.common.filesystem.TempFolder) FileOutputStream(java.io.FileOutputStream) File(java.io.File) FileInputStream(java.io.FileInputStream) Pair(io.prestosql.spi.heuristicindex.Pair) Test(org.testng.annotations.Test)

Example 24 with TempFolder

use of io.hetu.core.common.filesystem.TempFolder in project hetu-core by openlookeng.

the class TestBloomIndex method testMatches.

@Test
public void testMatches() throws IOException {
    try (TempFolder folder = new TempFolder();
        BloomIndex bloomIndex = new BloomIndex()) {
        folder.create();
        File testFile = folder.newFile();
        List<Object> bloomValues = ImmutableList.of("a", "b", "c", "d");
        bloomIndex.setExpectedNumOfEntries(bloomValues.size());
        bloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", bloomValues)));
        try (FileOutputStream fo = new FileOutputStream(testFile)) {
            bloomIndex.serialize(fo);
        }
        try (FileInputStream fi = new FileInputStream(testFile)) {
            bloomIndex.deserialize(fi);
        }
        RowExpression expression1 = simplePredicate(OperatorType.EQUAL, "testColumn", VARCHAR, "a");
        RowExpression expression2 = simplePredicate(OperatorType.EQUAL, "testColumn", VARCHAR, "e");
        assertTrue(bloomIndex.matches(expression1));
        assertFalse(bloomIndex.matches(expression2));
    }
}
Also used : TempFolder(io.hetu.core.common.filesystem.TempFolder) FileOutputStream(java.io.FileOutputStream) RowExpression(io.prestosql.spi.relation.RowExpression) File(java.io.File) FileInputStream(java.io.FileInputStream) Pair(io.prestosql.spi.heuristicindex.Pair) Test(org.testng.annotations.Test)

Example 25 with TempFolder

use of io.hetu.core.common.filesystem.TempFolder in project hetu-core by openlookeng.

the class TestIndexRecordManager method testDelete.

@Test
public void testDelete() throws IOException {
    try (TempFolder folder = new TempFolder()) {
        folder.create();
        HetuMetastore testMetaStore = new HetuFsMetastore(new HetuFsMetastoreConfig().setHetuFileSystemMetastorePath(folder.getRoot().getPath()), FILE_SYSTEM_CLIENT);
        IndexRecordManager indexRecordManager = new IndexRecordManager(testMetaStore);
        indexRecordManager.addIndexRecord("1", "testUser", "testCatalog.testSchema.testTable", new String[] { "testColumn" }, "minmax", 0L, Collections.emptyList(), Arrays.asList("cp=1"));
        indexRecordManager.addIndexRecord("2", "testUser", "testCatalog.testSchema.testTable", new String[] { "testColumn" }, "bloom", 0L, Collections.emptyList(), Arrays.asList("cp=1"));
        assertNotNull(indexRecordManager.lookUpIndexRecord("1"));
        assertEquals(indexRecordManager.getIndexRecords().size(), 2);
        // Delete 1
        indexRecordManager.deleteIndexRecord("1", Collections.emptyList());
        assertNull(indexRecordManager.lookUpIndexRecord("1"));
        assertNotNull(indexRecordManager.lookUpIndexRecord("2"));
        assertEquals(indexRecordManager.getIndexRecords().size(), 1);
        // Delete 1 again
        indexRecordManager.deleteIndexRecord("1", Collections.emptyList());
        assertNull(indexRecordManager.lookUpIndexRecord("1"));
        assertNotNull(indexRecordManager.lookUpIndexRecord("2"));
        assertEquals(indexRecordManager.getIndexRecords().size(), 1);
        // Delete 2
        indexRecordManager.deleteIndexRecord("2", Collections.emptyList());
        assertNull(indexRecordManager.lookUpIndexRecord("2"));
        assertEquals(indexRecordManager.getIndexRecords().size(), 0);
    }
}
Also used : HetuFsMetastoreConfig(io.hetu.core.metastore.hetufilesystem.HetuFsMetastoreConfig) TempFolder(io.hetu.core.common.filesystem.TempFolder) HetuFsMetastore(io.hetu.core.metastore.hetufilesystem.HetuFsMetastore) HetuMetastore(io.prestosql.spi.metastore.HetuMetastore) JdbcHetuMetastore(io.hetu.core.metastore.jdbc.JdbcHetuMetastore) Test(org.testng.annotations.Test)

Aggregations

TempFolder (io.hetu.core.common.filesystem.TempFolder)28 File (java.io.File)18 Test (org.testng.annotations.Test)17 FileInputStream (java.io.FileInputStream)15 FileOutputStream (java.io.FileOutputStream)14 Pair (io.prestosql.spi.heuristicindex.Pair)12 HashMap (java.util.HashMap)5 HetuFileSystemClientPlugin (io.hetu.core.filesystem.HetuFileSystemClientPlugin)4 HetuMetastorePlugin (io.hetu.core.metastore.HetuMetastorePlugin)4 HetuFsMetastore (io.hetu.core.metastore.hetufilesystem.HetuFsMetastore)4 HetuFsMetastoreConfig (io.hetu.core.metastore.hetufilesystem.HetuFsMetastoreConfig)4 BeforeClass (org.testng.annotations.BeforeClass)4 HetuMetastore (io.prestosql.spi.metastore.HetuMetastore)3 InputStream (java.io.InputStream)3 Properties (java.util.Properties)3 HetuLocalFileSystemClient (io.hetu.core.filesystem.HetuLocalFileSystemClient)2 LocalConfig (io.hetu.core.filesystem.LocalConfig)2 JdbcHetuMetastore (io.hetu.core.metastore.jdbc.JdbcHetuMetastore)2 MemoryPlugin (io.prestosql.plugin.memory.MemoryPlugin)2 RowExpression (io.prestosql.spi.relation.RowExpression)2