use of io.hetu.core.common.filesystem.TempFolder in project hetu-core by openlookeng.
the class TestBloomIndex method testMmapUse.
@Test
public void testMmapUse() throws IOException {
// experiment test to understand the performance of using mmap
try (TempFolder folder = new TempFolder();
BloomIndex objectBloomIndex = new BloomIndex();
BloomIndex bloomIndexMemory = new BloomIndex();
BloomIndex bloomIndexMmap = new BloomIndex();
BloomIndex objectBloomIndexString = new BloomIndex();
BloomIndex objectBloomIndexDouble = new BloomIndex();
BloomIndex bloomIndexMemoryString = new BloomIndex();
BloomIndex bloomIndexMemoryDouble = new BloomIndex();
BloomIndex bloomIndexMmapDouble = new BloomIndex();
BloomIndex bloomIndexMmapString = new BloomIndex()) {
folder.create();
int dataEntryNum = 2000000;
int queryNum = 10000;
long startTime;
long stopTime;
long elapsedTime;
// compare the performance on int data with 2000000 values
File testFile = folder.newFile("int");
objectBloomIndex.setExpectedNumOfEntries(dataEntryNum);
Random rd = new Random();
List<Integer> arr = new ArrayList<>();
for (int i = 0; i < dataEntryNum; i++) {
arr.add(rd.nextInt());
}
objectBloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of(arr))));
try (FileOutputStream fo = new FileOutputStream(testFile)) {
objectBloomIndex.serialize(fo);
}
bloomIndexMemory.setMmapEnabled(false);
bloomIndexMemory.setExpectedNumOfEntries(dataEntryNum);
try (FileInputStream fi = new FileInputStream(testFile)) {
bloomIndexMemory.deserialize(fi);
}
bloomIndexMmap.setMmapEnabled(true);
bloomIndexMmap.setExpectedNumOfEntries(dataEntryNum);
try (FileInputStream fi = new FileInputStream(testFile)) {
bloomIndexMmap.deserialize(fi);
}
System.out.println(testFile);
Random rdTest = new Random();
// get query time using memory
startTime = System.currentTimeMillis();
for (int i = 0; i < queryNum; i++) {
int testNum = rdTest.nextInt();
RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", INTEGER, testNum);
bloomIndexMemory.matches(expression);
}
stopTime = System.currentTimeMillis();
elapsedTime = stopTime - startTime;
System.out.println(elapsedTime);
// get query time using mmap
startTime = System.currentTimeMillis();
for (int i = 0; i < queryNum; i++) {
int testNum = rdTest.nextInt();
RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", INTEGER, testNum);
bloomIndexMmap.matches(expression);
}
stopTime = System.currentTimeMillis();
elapsedTime = stopTime - startTime;
System.out.println(elapsedTime);
BloomFilter memoryFilter = bloomIndexMemory.getFilter();
BloomFilter mmapFilter = bloomIndexMmap.getFilter();
assertEquals(mmapFilter, memoryFilter);
long usage1 = bloomIndexMemory.getMemoryUsage();
long usage2 = bloomIndexMmap.getMemoryUsage();
assertTrue(usage1 > usage2, "mmap should use less memory.");
long fileUsage1 = bloomIndexMemory.getDiskUsage();
long fileUsage2 = bloomIndexMmap.getDiskUsage();
assertTrue(fileUsage1 < fileUsage2, "mmap should use file space.");
// compare the performance on double data with 2000000 entries
File testFileDouble = folder.newFile("double");
objectBloomIndexDouble.setExpectedNumOfEntries(dataEntryNum);
Random rdDouble = new Random();
List<Double> arrDouble = new ArrayList<>();
for (int i = 0; i < dataEntryNum; i++) {
arrDouble.add(rdDouble.nextDouble());
}
objectBloomIndexDouble.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of(arrDouble))));
try (FileOutputStream fo = new FileOutputStream(testFileDouble)) {
objectBloomIndexDouble.serialize(fo);
}
bloomIndexMemoryDouble.setMmapEnabled(false);
bloomIndexMemoryDouble.setExpectedNumOfEntries(dataEntryNum);
try (FileInputStream fi = new FileInputStream(testFileDouble)) {
bloomIndexMemoryDouble.deserialize(fi);
}
bloomIndexMmapDouble.setMmapEnabled(true);
bloomIndexMmapDouble.setExpectedNumOfEntries(dataEntryNum);
try (FileInputStream fi = new FileInputStream(testFileDouble)) {
bloomIndexMmapDouble.deserialize(fi);
}
System.out.println(testFileDouble);
Random rdTestDouble = new Random();
// get query time using memory
startTime = System.currentTimeMillis();
for (int i = 0; i < queryNum; i++) {
double testDouble = rdTestDouble.nextDouble();
RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", DOUBLE, testDouble);
bloomIndexMemoryDouble.matches(expression);
}
stopTime = System.currentTimeMillis();
elapsedTime = stopTime - startTime;
System.out.println(elapsedTime);
// get query time using mmap
startTime = System.currentTimeMillis();
for (int i = 0; i < queryNum; i++) {
double testDouble = rdTestDouble.nextDouble();
RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", DOUBLE, testDouble);
bloomIndexMmapDouble.matches(expression);
}
stopTime = System.currentTimeMillis();
elapsedTime = stopTime - startTime;
System.out.println(elapsedTime);
memoryFilter = bloomIndexMemoryDouble.getFilter();
mmapFilter = bloomIndexMmapDouble.getFilter();
assertEquals(mmapFilter, memoryFilter);
usage1 = bloomIndexMemoryDouble.getMemoryUsage();
usage2 = bloomIndexMmapDouble.getMemoryUsage();
assertTrue(usage1 > usage2, "mmap should use less memory.");
fileUsage1 = bloomIndexMemoryDouble.getDiskUsage();
fileUsage2 = bloomIndexMmapDouble.getDiskUsage();
assertTrue(fileUsage1 < fileUsage2, "mmap should use file space.");
// compare the performance on UUID string with 2000000 entries
File testFileString = folder.newFile("string");
objectBloomIndexString.setExpectedNumOfEntries(dataEntryNum);
List<String> arrString = new ArrayList<>();
for (int i = 0; i < dataEntryNum; i++) {
arrString.add(UUID.randomUUID().toString());
}
objectBloomIndexString.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of(arrString))));
try (FileOutputStream fo = new FileOutputStream(testFileString)) {
objectBloomIndexString.serialize(fo);
}
bloomIndexMemoryString.setMmapEnabled(false);
bloomIndexMemoryString.setExpectedNumOfEntries(dataEntryNum);
try (FileInputStream fi = new FileInputStream(testFileString)) {
bloomIndexMemoryString.deserialize(fi);
}
bloomIndexMmapString.setMmapEnabled(true);
bloomIndexMmapString.setExpectedNumOfEntries(dataEntryNum);
try (FileInputStream fi = new FileInputStream(testFileString)) {
bloomIndexMmapString.deserialize(fi);
}
System.out.println(testFileString);
// get query time using memory
startTime = System.currentTimeMillis();
for (int i = 0; i < queryNum; i++) {
String testString = UUID.randomUUID().toString();
RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", VARCHAR, testString);
bloomIndexMemoryString.matches(expression);
}
stopTime = System.currentTimeMillis();
elapsedTime = stopTime - startTime;
System.out.println(elapsedTime);
// get query time using mmap
startTime = System.currentTimeMillis();
for (int i = 0; i < queryNum; i++) {
String testString = UUID.randomUUID().toString();
RowExpression expression = simplePredicate(OperatorType.EQUAL, "testColumn", VARCHAR, testString);
bloomIndexMmapString.matches(expression);
}
stopTime = System.currentTimeMillis();
elapsedTime = stopTime - startTime;
System.out.println(elapsedTime);
memoryFilter = bloomIndexMemoryString.getFilter();
mmapFilter = bloomIndexMmapString.getFilter();
assertEquals(mmapFilter, memoryFilter);
usage1 = bloomIndexMemoryString.getMemoryUsage();
usage2 = bloomIndexMmapString.getMemoryUsage();
assertTrue(usage1 > usage2, "mmap should use less memory.");
fileUsage1 = bloomIndexMemoryString.getDiskUsage();
fileUsage2 = bloomIndexMmapString.getDiskUsage();
assertTrue(fileUsage1 < fileUsage2, "mmap should use file space.");
}
}
use of io.hetu.core.common.filesystem.TempFolder in project hetu-core by openlookeng.
the class TestBloomIndex method testPersist.
@Test
public void testPersist() throws IOException {
try (TempFolder folder = new TempFolder();
BloomIndex objectBloomIndex = new BloomIndex()) {
folder.create();
File testFile = folder.newFile();
List<Object> testValues = ImmutableList.of("%#!", ":dfs", "测试", "\n", "ab", "a");
objectBloomIndex.setExpectedNumOfEntries(testValues.size());
objectBloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", testValues)));
try (FileOutputStream fo = new FileOutputStream(testFile)) {
objectBloomIndex.serialize(fo);
}
try (FileInputStream fi = new FileInputStream(testFile)) {
assertTrue(fi.available() != 0, "Persisted bloom index file is empty");
}
}
}
use of io.hetu.core.common.filesystem.TempFolder in project hetu-core by openlookeng.
the class TestBloomIndex method testMatching.
@Test
public void testMatching() throws IOException {
try (TempFolder folder = new TempFolder()) {
folder.create();
File testFile = folder.newFile();
List<Object> testValues;
// Test String bloom indexer
try (BloomIndex stringBloomIndex = new BloomIndex()) {
testValues = ImmutableList.of("a", "ab", "测试", "\n", "%#!", ":dfs");
stringBloomIndex.setExpectedNumOfEntries(testValues.size());
stringBloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", testValues)));
try (FileOutputStream fo = new FileOutputStream(testFile)) {
stringBloomIndex.serialize(fo);
}
try (FileInputStream fi = new FileInputStream(testFile)) {
stringBloomIndex.deserialize(fi);
}
assertTrue(mightContain(stringBloomIndex, VARCHAR, "a"));
assertTrue(mightContain(stringBloomIndex, VARCHAR, "ab"));
assertTrue(mightContain(stringBloomIndex, VARCHAR, "测试"));
assertTrue(mightContain(stringBloomIndex, VARCHAR, "\n"));
assertTrue(mightContain(stringBloomIndex, VARCHAR, "%#!"));
assertTrue(mightContain(stringBloomIndex, VARCHAR, ":dfs"));
assertFalse(mightContain(stringBloomIndex, VARCHAR, "random"));
assertFalse(mightContain(stringBloomIndex, VARCHAR, "abc"));
}
// Test with the generic type to be Object
try (BloomIndex objectBloomIndex = new BloomIndex()) {
testValues = ImmutableList.of("a", "ab", "测试", "\n", "%#!", ":dfs");
objectBloomIndex.setExpectedNumOfEntries(testValues.size());
objectBloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", testValues)));
try (FileOutputStream fo = new FileOutputStream(testFile)) {
objectBloomIndex.serialize(fo);
}
try (FileInputStream fi = new FileInputStream(testFile)) {
objectBloomIndex.deserialize(fi);
}
assertTrue(mightContain(objectBloomIndex, VARCHAR, "a"));
assertTrue(mightContain(objectBloomIndex, VARCHAR, "ab"));
assertTrue(mightContain(objectBloomIndex, VARCHAR, "测试"));
assertTrue(mightContain(objectBloomIndex, VARCHAR, "\n"));
assertTrue(mightContain(objectBloomIndex, VARCHAR, "%#!"));
assertTrue(mightContain(objectBloomIndex, VARCHAR, ":dfs"));
assertFalse(mightContain(objectBloomIndex, VARCHAR, "random"));
assertFalse(mightContain(objectBloomIndex, VARCHAR, "abc"));
}
// Test single insertion
try (BloomIndex simpleBloomIndex = new BloomIndex()) {
simpleBloomIndex.setExpectedNumOfEntries(6);
simpleBloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of("a"))));
simpleBloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of("ab"))));
simpleBloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of("测试"))));
simpleBloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of("\n"))));
simpleBloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of("%#!"))));
simpleBloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", ImmutableList.of(":dfs"))));
try (FileOutputStream fo = new FileOutputStream(testFile)) {
simpleBloomIndex.serialize(fo);
}
try (FileInputStream fi = new FileInputStream(testFile)) {
simpleBloomIndex.deserialize(fi);
}
assertTrue(mightContain(simpleBloomIndex, VARCHAR, "a"));
assertTrue(mightContain(simpleBloomIndex, VARCHAR, "ab"));
assertTrue(mightContain(simpleBloomIndex, VARCHAR, "测试"));
assertTrue(mightContain(simpleBloomIndex, VARCHAR, "\n"));
assertTrue(mightContain(simpleBloomIndex, VARCHAR, "%#!"));
assertTrue(mightContain(simpleBloomIndex, VARCHAR, ":dfs"));
assertFalse(mightContain(simpleBloomIndex, VARCHAR, "random"));
assertFalse(mightContain(simpleBloomIndex, VARCHAR, "abc"));
}
}
}
use of io.hetu.core.common.filesystem.TempFolder in project hetu-core by openlookeng.
the class TestBloomIndex method testMatches.
@Test
public void testMatches() throws IOException {
try (TempFolder folder = new TempFolder();
BloomIndex bloomIndex = new BloomIndex()) {
folder.create();
File testFile = folder.newFile();
List<Object> bloomValues = ImmutableList.of("a", "b", "c", "d");
bloomIndex.setExpectedNumOfEntries(bloomValues.size());
bloomIndex.addValues(Collections.singletonList(new Pair<>("testColumn", bloomValues)));
try (FileOutputStream fo = new FileOutputStream(testFile)) {
bloomIndex.serialize(fo);
}
try (FileInputStream fi = new FileInputStream(testFile)) {
bloomIndex.deserialize(fi);
}
RowExpression expression1 = simplePredicate(OperatorType.EQUAL, "testColumn", VARCHAR, "a");
RowExpression expression2 = simplePredicate(OperatorType.EQUAL, "testColumn", VARCHAR, "e");
assertTrue(bloomIndex.matches(expression1));
assertFalse(bloomIndex.matches(expression2));
}
}
use of io.hetu.core.common.filesystem.TempFolder in project hetu-core by openlookeng.
the class TestIndexRecordManager method testDelete.
@Test
public void testDelete() throws IOException {
try (TempFolder folder = new TempFolder()) {
folder.create();
HetuMetastore testMetaStore = new HetuFsMetastore(new HetuFsMetastoreConfig().setHetuFileSystemMetastorePath(folder.getRoot().getPath()), FILE_SYSTEM_CLIENT);
IndexRecordManager indexRecordManager = new IndexRecordManager(testMetaStore);
indexRecordManager.addIndexRecord("1", "testUser", "testCatalog.testSchema.testTable", new String[] { "testColumn" }, "minmax", 0L, Collections.emptyList(), Arrays.asList("cp=1"));
indexRecordManager.addIndexRecord("2", "testUser", "testCatalog.testSchema.testTable", new String[] { "testColumn" }, "bloom", 0L, Collections.emptyList(), Arrays.asList("cp=1"));
assertNotNull(indexRecordManager.lookUpIndexRecord("1"));
assertEquals(indexRecordManager.getIndexRecords().size(), 2);
// Delete 1
indexRecordManager.deleteIndexRecord("1", Collections.emptyList());
assertNull(indexRecordManager.lookUpIndexRecord("1"));
assertNotNull(indexRecordManager.lookUpIndexRecord("2"));
assertEquals(indexRecordManager.getIndexRecords().size(), 1);
// Delete 1 again
indexRecordManager.deleteIndexRecord("1", Collections.emptyList());
assertNull(indexRecordManager.lookUpIndexRecord("1"));
assertNotNull(indexRecordManager.lookUpIndexRecord("2"));
assertEquals(indexRecordManager.getIndexRecords().size(), 1);
// Delete 2
indexRecordManager.deleteIndexRecord("2", Collections.emptyList());
assertNull(indexRecordManager.lookUpIndexRecord("2"));
assertEquals(indexRecordManager.getIndexRecords().size(), 0);
}
}
Aggregations