use of io.prestosql.spi.heuristicindex.IndexLookUpException in project hetu-core by openlookeng.
the class TestBTreeIndex method testLessThanEqualTo.
@Test
public void testLessThanEqualTo() throws IOException, IndexLookUpException {
BTreeIndex index = new BTreeIndex();
for (int i = 0; i < 100; i++) {
List<Pair> pairs = new ArrayList<>();
Long key = Long.valueOf(100 + i);
String value = "value" + i;
pairs.add(new Pair(key, value));
Pair pair = new Pair("dummyCol", pairs);
index.addKeyValues(Collections.singletonList(pair));
}
File file = getFile();
index.serialize(new FileOutputStream(file));
BTreeIndex readIndex = new BTreeIndex();
readIndex.deserialize(new FileInputStream(file));
RowExpression comparisonExpression = simplePredicate(OperatorType.LESS_THAN_OR_EQUAL, "dummyCol", BIGINT, 120L);
Iterator<String> result = readIndex.lookUp(comparisonExpression);
assertNotNull(result, "Result shouldn't be null");
assertTrue(result.hasNext());
Object[] arr = IntStream.iterate(0, n -> n + 1).limit(21).mapToObj(i -> "value" + i).toArray();
Arrays.sort(arr);
for (int i = 0; i <= 20; i++) {
assertEquals(arr[i], result.next());
}
assertFalse(result.hasNext());
index.close();
}
use of io.prestosql.spi.heuristicindex.IndexLookUpException in project hetu-core by openlookeng.
the class SplitFiltering method filterUsingInvertedIndex.
private static List<Split> filterUsingInvertedIndex(RowExpression expression, List<Split> inputSplits, String fullQualifiedTableName, Set<String> referencedColumns, Map<String, IndexRecord> indexRecordKeyToRecordMap, HeuristicIndexerManager indexerManager) {
try {
Map<String, Long> inputMaxLastUpdated = new HashMap<>();
Map<String, Long> indexMaxLastUpdated = new HashMap<>();
Map<String, List<Split>> partitionSplitMap = new HashMap<>();
for (Split split : inputSplits) {
String filePathStr = split.getConnectorSplit().getFilePath();
String indexKey = getPartitionKeyOrElse(filePathStr, TABLE_LEVEL_KEY);
long lastUpdated = split.getConnectorSplit().getLastModifiedTime();
if (!inputMaxLastUpdated.containsKey(indexKey) || lastUpdated > inputMaxLastUpdated.get(indexKey)) {
inputMaxLastUpdated.put(indexKey, lastUpdated);
}
if (!partitionSplitMap.containsKey(indexKey)) {
partitionSplitMap.put(indexKey, new ArrayList<>());
}
partitionSplitMap.get(indexKey).add(split);
}
// Split is not compliant to table structure. Return all the splits
if (partitionSplitMap.isEmpty()) {
return inputSplits;
}
// col -> list of all indices on this column (all partitions)
Map<String, List<IndexMetadata>> allIndices = new HashMap<>();
// index loading and verification
for (String column : referencedColumns) {
List<IndexMetadata> indexMetadataList = new ArrayList<>();
for (String indexType : INVERTED_INDEX) {
indexMetadataList.addAll(getCache(indexerManager.getIndexClient()).getIndices(fullQualifiedTableName, column, indexType, partitionSplitMap.keySet(), Collections.max(inputMaxLastUpdated.values()), indexRecordKeyToRecordMap));
}
// If any of the split contains data which is modified after the index was created, return without filtering
for (IndexMetadata index : indexMetadataList) {
String partitionKey = getPartitionKeyOrElse(index.getUri(), TABLE_LEVEL_KEY);
long lastModifiedTime = Long.parseLong(index.getIndex().getProperties().getProperty(MAX_MODIFIED_TIME));
indexMaxLastUpdated.put(partitionKey, lastModifiedTime);
}
allIndices.put(column, indexMetadataList);
}
// lookup index
IndexFilter filter = indexerManager.getIndexFilter(allIndices);
Iterator<String> iterator = filter.lookUp(expression);
if (iterator == null) {
throw new IndexLookUpException();
}
// all positioned looked up from index, organized by file path
Map<String, List<Pair<Long, Long>>> lookUpResults = new HashMap<>();
while (iterator.hasNext()) {
SerializationUtils.LookUpResult parsedLookUpResult = deserializeStripeSymbol(iterator.next());
if (!lookUpResults.containsKey(parsedLookUpResult.filepath)) {
lookUpResults.put(parsedLookUpResult.filepath, new ArrayList<>());
}
lookUpResults.get(parsedLookUpResult.filepath).add(parsedLookUpResult.stripe);
}
// filtering
List<Split> filteredSplits = new ArrayList<>();
for (Map.Entry<String, List<Split>> entry : partitionSplitMap.entrySet()) {
String partitionKey = entry.getKey();
// the partition is indexed by its own partition's index
boolean partitionHasOwnIndex = indexMaxLastUpdated.containsKey(partitionKey);
// the partition is covered by a table-level index
boolean partitionHasTableLevelIndex = indexMaxLastUpdated.size() == 1 && indexMaxLastUpdated.containsKey(TABLE_LEVEL_KEY);
if (!partitionHasOwnIndex && !partitionHasTableLevelIndex) {
filteredSplits.addAll(entry.getValue());
} else {
long indexLastModifiedTimeOfThisPartition;
if (partitionHasOwnIndex) {
indexLastModifiedTimeOfThisPartition = indexMaxLastUpdated.get(partitionKey);
} else {
indexLastModifiedTimeOfThisPartition = indexMaxLastUpdated.get(TABLE_LEVEL_KEY);
}
for (Split split : entry.getValue()) {
String filePathStr = new URI(split.getConnectorSplit().getFilePath()).getPath();
if (split.getConnectorSplit().getLastModifiedTime() > indexLastModifiedTimeOfThisPartition) {
filteredSplits.add(split);
} else if (lookUpResults.containsKey(filePathStr)) {
Pair<Long, Long> targetRange = new Pair<>(split.getConnectorSplit().getStartIndex(), split.getConnectorSplit().getEndIndex());
// do stripe matching: check if [targetStart, targetEnd] has any overlapping with the matching stripes
// first sort matching stripes, e.g. (5,10), (18,25), (30,35), (35, 40)
// then do binary search for both start and end of the target
List<Pair<Long, Long>> stripes = lookUpResults.get(filePathStr);
stripes.sort(Comparator.comparingLong(Pair::getFirst));
if (rangeSearch(stripes, targetRange)) {
filteredSplits.add(split);
}
}
}
}
}
return filteredSplits;
} catch (Throwable e) {
LOG.debug("Exception occurred while filtering. Returning original splits", e);
return inputSplits;
}
}
use of io.prestosql.spi.heuristicindex.IndexLookUpException in project hetu-core by openlookeng.
the class AbstractOrcRecordReader method filterStripeUsingIndex.
private boolean filterStripeUsingIndex(StripeInformation stripe, Map<Long, List<IndexMetadata>> stripeOffsetToIndex, Map<String, Domain> and, Map<String, List<Domain>> or) {
if (stripeOffsetToIndex.isEmpty()) {
return false;
}
List<IndexMetadata> stripeIndex = stripeOffsetToIndex.get(Long.valueOf(stripe.getOffset()));
Map<Index, Domain> andDomainMap = new HashMap<>();
Map<Index, Domain> orDomainMap = new HashMap<>();
for (Map.Entry<String, Domain> domainEntry : and.entrySet()) {
String columnName = domainEntry.getKey();
Domain columnDomain = domainEntry.getValue();
// if the index exists, there should only be one index for this column within this stripe
List<IndexMetadata> indexMetadata = stripeIndex.stream().filter(p -> p.getColumns()[0].equalsIgnoreCase(columnName)).collect(Collectors.toList());
if (indexMetadata.isEmpty() || indexMetadata.size() > 1) {
continue;
}
Index index = indexMetadata.get(0).getIndex();
andDomainMap.put(index, columnDomain);
}
for (Map.Entry<String, List<Domain>> domainEntry : or.entrySet()) {
String columnName = domainEntry.getKey();
List<Domain> columnDomain = domainEntry.getValue();
// if the index exists, there should only be one index for this column within this stripe
List<IndexMetadata> indexMetadata = stripeIndex.stream().filter(p -> p.getColumns()[0].equalsIgnoreCase(columnName)).collect(Collectors.toList());
if (indexMetadata.isEmpty() || indexMetadata.size() > 1) {
continue;
}
Index index = indexMetadata.get(0).getIndex();
orDomainMap.put(index, columnDomain.get(0));
}
if (!andDomainMap.isEmpty()) {
List<Iterator<Integer>> matchings = new ArrayList<>(andDomainMap.size());
for (Map.Entry<Index, Domain> e : andDomainMap.entrySet()) {
try {
Iterator<Integer> lookUpRes = e.getKey().lookUp(e.getValue());
if (lookUpRes != null) {
matchings.add(lookUpRes);
} else if (!e.getKey().matches(e.getValue())) {
return true;
}
} catch (UnsupportedOperationException | IndexLookUpException uoe2) {
return false;
}
}
if (!matchings.isEmpty()) {
Iterator<Integer> thisStripeMatchingRows = SequenceUtils.intersect(matchings);
PeekingIterator<Integer> peekingIterator = Iterators.peekingIterator(thisStripeMatchingRows);
this.stripeMatchingRows.put(stripe, peekingIterator);
}
return false;
}
if (!orDomainMap.isEmpty()) {
for (Map.Entry<Index, Domain> e : orDomainMap.entrySet()) {
try {
Iterator<Integer> thisStripeMatchingRows = e.getKey().lookUp(e.getValue());
if (thisStripeMatchingRows != null) {
if (thisStripeMatchingRows.hasNext()) {
/* any one matched; then include the stripe */
return false;
}
} else if (e.getKey().matches(e.getValue())) {
return false;
}
} catch (UnsupportedOperationException | IndexLookUpException uoe2) {
return false;
}
}
return true;
}
return false;
}
use of io.prestosql.spi.heuristicindex.IndexLookUpException in project hetu-core by openlookeng.
the class HeuristicIndexFilter method lookUp.
@Override
public <I extends Comparable<I>> Iterator<I> lookUp(Object expression) throws IndexLookUpException {
if (expression instanceof CallExpression) {
return lookUpAll((RowExpression) expression);
}
if (expression instanceof SpecialForm) {
SpecialForm specialForm = (SpecialForm) expression;
switch(specialForm.getForm()) {
case IN:
case BETWEEN:
return lookUpAll((RowExpression) expression);
case AND:
Iterator<I> iteratorAnd1 = lookUp(specialForm.getArguments().get(0));
Iterator<I> iteratorAnd2 = lookUp(specialForm.getArguments().get(1));
if (iteratorAnd1 == null && iteratorAnd2 == null) {
return null;
} else if (iteratorAnd1 == null) {
return iteratorAnd2;
} else if (iteratorAnd2 == null) {
return iteratorAnd1;
} else {
return SequenceUtils.intersect(iteratorAnd1, iteratorAnd2);
}
case OR:
Iterator<I> iteratorOr1 = lookUp(specialForm.getArguments().get(0));
Iterator<I> iteratorOr2 = lookUp(specialForm.getArguments().get(1));
if (iteratorOr1 == null || iteratorOr2 == null) {
throw new IndexLookUpException();
}
return SequenceUtils.union(iteratorOr1, iteratorOr2);
}
}
throw new IndexLookUpException();
}
use of io.prestosql.spi.heuristicindex.IndexLookUpException in project hetu-core by openlookeng.
the class HeuristicIndexFilter method lookUpAll.
/**
* Lookup all index available according to the expression and union the result.
* <p>
* It returns {@code null} as the special value for "universe" result U such that,
* for any other results A: U \and A == A, U \or A == U.
* <p>
* If any of the index throws {@code IndexLookUpException} during lookup, it immediately break and return null.
*/
private <T extends Comparable<T>> Iterator<T> lookUpAll(RowExpression expression) {
RowExpression varRef = null;
if (expression instanceof CallExpression) {
varRef = ((CallExpression) expression).getArguments().get(0);
}
if (expression instanceof SpecialForm && (((SpecialForm) expression).getForm() == SpecialForm.Form.BETWEEN || ((SpecialForm) expression).getForm() == SpecialForm.Form.IN)) {
varRef = ((SpecialForm) expression).getArguments().get(0);
}
if (!(varRef instanceof VariableReferenceExpression)) {
return null;
}
List<IndexMetadata> selectedIndex = HeuristicIndexSelector.select(expression, indices.get(((VariableReferenceExpression) varRef).getName()));
if (selectedIndex.isEmpty()) {
return null;
}
try {
List<Iterator<T>> iterators = selectedIndex.parallelStream().map(indexMetadata -> {
try {
return (Iterator<T>) indexMetadata.getIndex().lookUp(expression);
} catch (IndexLookUpException e) {
throw new RuntimeException(e);
}
}).collect(Collectors.toList());
return SequenceUtils.union(iterators);
} catch (RuntimeException re) {
return null;
}
}
Aggregations