use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.
the class TestPartitionIndexWriter method testAddValueMultThread.
@Test
public void testAddValueMultThread() throws InterruptedException {
List<Pair<String, Type>> columns = new ArrayList<>();
List<String> partitions = Collections.singletonList("partition1");
Properties properties = new Properties();
CreateIndexMetadata createIndexMetadata = new CreateIndexMetadata("hetu_partition_idx", "testTable", "BTREE", 0L, columns, partitions, properties, "testuser", CreateIndexMetadata.Level.PARTITION);
HetuFileSystemClient fileSystemClient = Mockito.mock(HetuFileSystemClient.class);
Properties connectorMetadata1 = new Properties();
connectorMetadata1.setProperty(HetuConstant.DATASOURCE_FILE_MODIFICATION, String.valueOf(System.currentTimeMillis()));
connectorMetadata1.setProperty(HetuConstant.DATASOURCE_FILE_PATH, "hdfs://testable/testcolumn/cp=123121/file1");
connectorMetadata1.setProperty(HetuConstant.DATASOURCE_STRIPE_OFFSET, "3");
connectorMetadata1.setProperty(HetuConstant.DATASOURCE_STRIPE_LENGTH, "100");
Properties connectorMetadata2 = new Properties();
connectorMetadata2.setProperty(HetuConstant.DATASOURCE_FILE_MODIFICATION, String.valueOf(System.currentTimeMillis()));
connectorMetadata2.setProperty(HetuConstant.DATASOURCE_FILE_PATH, "hdfs://testable/testcolumn/cp=123121/file2");
connectorMetadata2.setProperty(HetuConstant.DATASOURCE_STRIPE_OFFSET, "3");
connectorMetadata2.setProperty(HetuConstant.DATASOURCE_STRIPE_LENGTH, "100");
PartitionIndexWriter indexWriter = new PartitionIndexWriter(createIndexMetadata, fileSystemClient, Paths.get("/tmp"));
ExecutorService executorService = Executors.newFixedThreadPool(2);
CountDownLatch latch = new CountDownLatch(2);
executorService.submit(new TestDriver(indexWriter, connectorMetadata1, latch));
executorService.submit(new TestDriver(indexWriter, connectorMetadata2, latch));
latch.await(5, TimeUnit.SECONDS);
Map<Comparable<? extends Comparable<?>>, String> result = indexWriter.getDataMap();
assertEquals(10, result.size());
assertEquals(2, indexWriter.getSymbolTable().size());
}
use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.
the class TestBTreeIndex method testLessThanEqualTo.
@Test
public void testLessThanEqualTo() throws IOException, IndexLookUpException {
BTreeIndex index = new BTreeIndex();
for (int i = 0; i < 100; i++) {
List<Pair> pairs = new ArrayList<>();
Long key = Long.valueOf(100 + i);
String value = "value" + i;
pairs.add(new Pair(key, value));
Pair pair = new Pair("dummyCol", pairs);
index.addKeyValues(Collections.singletonList(pair));
}
File file = getFile();
index.serialize(new FileOutputStream(file));
BTreeIndex readIndex = new BTreeIndex();
readIndex.deserialize(new FileInputStream(file));
RowExpression comparisonExpression = simplePredicate(OperatorType.LESS_THAN_OR_EQUAL, "dummyCol", BIGINT, 120L);
Iterator<String> result = readIndex.lookUp(comparisonExpression);
assertNotNull(result, "Result shouldn't be null");
assertTrue(result.hasNext());
Object[] arr = IntStream.iterate(0, n -> n + 1).limit(21).mapToObj(i -> "value" + i).toArray();
Arrays.sort(arr);
for (int i = 0; i <= 20; i++) {
assertEquals(arr[i], result.next());
}
assertFalse(result.hasNext());
index.close();
}
use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.
the class TestBTreeIndex method testDeserialize.
@Test
public void testDeserialize() throws IOException, IndexLookUpException {
BTreeIndex index = new BTreeIndex();
String value = "foo bar";
for (int i = 0; i < 1000; i++) {
List<Pair> pairs = new ArrayList<>();
Long key = Long.valueOf(100 + i);
pairs.add(new Pair(key, value));
Pair pair = new Pair("dummyCol", pairs);
index.addKeyValues(Collections.singletonList(pair));
}
File file = File.createTempFile("test-serialize-", UUID.randomUUID().toString());
index.serialize(new FileOutputStream(file));
Index readindex = new BTreeIndex();
readindex.deserialize(new FileInputStream(file));
RowExpression comparisonExpression = simplePredicate(OperatorType.EQUAL, "column", BIGINT, 101L);
Iterator<String> result = readindex.lookUp(comparisonExpression);
assertNotNull(result, "Result shouldn't be null");
assertTrue(result.hasNext());
assertEquals(value, result.next());
index.close();
}
use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.
the class TestBTreeIndex method testLookup.
@Test
public void testLookup() throws IOException, IndexLookUpException {
BTreeIndex index = new BTreeIndex();
for (int i = 0; i < 100; i++) {
List<Pair> pairs = new ArrayList<>();
Long key = Long.valueOf(100 + i);
String value = "value" + i;
pairs.add(new Pair(key, value));
Pair pair = new Pair("dummyCol", pairs);
index.addKeyValues(Collections.singletonList(pair));
}
File file = getFile();
index.serialize(new FileOutputStream(file));
BTreeIndex readIndex = new BTreeIndex();
readIndex.deserialize(new FileInputStream(file));
RowExpression comparisonExpression = simplePredicate(OperatorType.EQUAL, "dummyCol", BIGINT, 101L);
Iterator<String> result = readIndex.lookUp(comparisonExpression);
assertNotNull(result, "Result shouldn't be null");
assertTrue(result.hasNext());
assertEquals("value1", result.next().toString());
index.close();
}
use of io.prestosql.spi.heuristicindex.Pair in project hetu-core by openlookeng.
the class SplitFiltering method filterUsingInvertedIndex.
private static List<Split> filterUsingInvertedIndex(RowExpression expression, List<Split> inputSplits, String fullQualifiedTableName, Set<String> referencedColumns, Map<String, IndexRecord> indexRecordKeyToRecordMap, HeuristicIndexerManager indexerManager) {
try {
Map<String, Long> inputMaxLastUpdated = new HashMap<>();
Map<String, Long> indexMaxLastUpdated = new HashMap<>();
Map<String, List<Split>> partitionSplitMap = new HashMap<>();
for (Split split : inputSplits) {
String filePathStr = split.getConnectorSplit().getFilePath();
String indexKey = getPartitionKeyOrElse(filePathStr, TABLE_LEVEL_KEY);
long lastUpdated = split.getConnectorSplit().getLastModifiedTime();
if (!inputMaxLastUpdated.containsKey(indexKey) || lastUpdated > inputMaxLastUpdated.get(indexKey)) {
inputMaxLastUpdated.put(indexKey, lastUpdated);
}
if (!partitionSplitMap.containsKey(indexKey)) {
partitionSplitMap.put(indexKey, new ArrayList<>());
}
partitionSplitMap.get(indexKey).add(split);
}
// Split is not compliant to table structure. Return all the splits
if (partitionSplitMap.isEmpty()) {
return inputSplits;
}
// col -> list of all indices on this column (all partitions)
Map<String, List<IndexMetadata>> allIndices = new HashMap<>();
// index loading and verification
for (String column : referencedColumns) {
List<IndexMetadata> indexMetadataList = new ArrayList<>();
for (String indexType : INVERTED_INDEX) {
indexMetadataList.addAll(getCache(indexerManager.getIndexClient()).getIndices(fullQualifiedTableName, column, indexType, partitionSplitMap.keySet(), Collections.max(inputMaxLastUpdated.values()), indexRecordKeyToRecordMap));
}
// If any of the split contains data which is modified after the index was created, return without filtering
for (IndexMetadata index : indexMetadataList) {
String partitionKey = getPartitionKeyOrElse(index.getUri(), TABLE_LEVEL_KEY);
long lastModifiedTime = Long.parseLong(index.getIndex().getProperties().getProperty(MAX_MODIFIED_TIME));
indexMaxLastUpdated.put(partitionKey, lastModifiedTime);
}
allIndices.put(column, indexMetadataList);
}
// lookup index
IndexFilter filter = indexerManager.getIndexFilter(allIndices);
Iterator<String> iterator = filter.lookUp(expression);
if (iterator == null) {
throw new IndexLookUpException();
}
// all positioned looked up from index, organized by file path
Map<String, List<Pair<Long, Long>>> lookUpResults = new HashMap<>();
while (iterator.hasNext()) {
SerializationUtils.LookUpResult parsedLookUpResult = deserializeStripeSymbol(iterator.next());
if (!lookUpResults.containsKey(parsedLookUpResult.filepath)) {
lookUpResults.put(parsedLookUpResult.filepath, new ArrayList<>());
}
lookUpResults.get(parsedLookUpResult.filepath).add(parsedLookUpResult.stripe);
}
// filtering
List<Split> filteredSplits = new ArrayList<>();
for (Map.Entry<String, List<Split>> entry : partitionSplitMap.entrySet()) {
String partitionKey = entry.getKey();
// the partition is indexed by its own partition's index
boolean partitionHasOwnIndex = indexMaxLastUpdated.containsKey(partitionKey);
// the partition is covered by a table-level index
boolean partitionHasTableLevelIndex = indexMaxLastUpdated.size() == 1 && indexMaxLastUpdated.containsKey(TABLE_LEVEL_KEY);
if (!partitionHasOwnIndex && !partitionHasTableLevelIndex) {
filteredSplits.addAll(entry.getValue());
} else {
long indexLastModifiedTimeOfThisPartition;
if (partitionHasOwnIndex) {
indexLastModifiedTimeOfThisPartition = indexMaxLastUpdated.get(partitionKey);
} else {
indexLastModifiedTimeOfThisPartition = indexMaxLastUpdated.get(TABLE_LEVEL_KEY);
}
for (Split split : entry.getValue()) {
String filePathStr = new URI(split.getConnectorSplit().getFilePath()).getPath();
if (split.getConnectorSplit().getLastModifiedTime() > indexLastModifiedTimeOfThisPartition) {
filteredSplits.add(split);
} else if (lookUpResults.containsKey(filePathStr)) {
Pair<Long, Long> targetRange = new Pair<>(split.getConnectorSplit().getStartIndex(), split.getConnectorSplit().getEndIndex());
// do stripe matching: check if [targetStart, targetEnd] has any overlapping with the matching stripes
// first sort matching stripes, e.g. (5,10), (18,25), (30,35), (35, 40)
// then do binary search for both start and end of the target
List<Pair<Long, Long>> stripes = lookUpResults.get(filePathStr);
stripes.sort(Comparator.comparingLong(Pair::getFirst));
if (rangeSearch(stripes, targetRange)) {
filteredSplits.add(split);
}
}
}
}
}
return filteredSplits;
} catch (Throwable e) {
LOG.debug("Exception occurred while filtering. Returning original splits", e);
return inputSplits;
}
}
Aggregations