use of io.prestosql.spi.heuristicindex.Index in project hetu-core by openlookeng.
the class TestIndexCacheRemoval method testIndexCacheEviction.
@Test
public void testIndexCacheEviction() throws Exception {
synchronized (this) {
HiveSplit testHiveSplit;
testHiveSplit = mock(HiveSplit.class);
when(testHiveSplit.getPath()).thenReturn(testPath);
when(testHiveSplit.getLastModifiedTime()).thenReturn(testLastModifiedTime);
IndexCacheLoader indexCacheLoader = mock(IndexCacheLoader.class);
IndexCache indexCache = new IndexCache(indexCacheLoader, loadDelay, new NoOpIndexClient());
// get index for split1
IndexMetadata indexMetadata1 = mock(IndexMetadata.class);
when(indexMetadata1.getLastModifiedTime()).thenReturn(testLastModifiedTime);
Index index1 = mock(Index.class);
when(indexMetadata1.getIndex()).thenReturn(index1);
when(index1.getMemoryUsage()).thenReturn(new DataSize(2, KILOBYTE).toBytes());
List<IndexMetadata> expectedIndices1 = new LinkedList<>();
expectedIndices1.add(indexMetadata1);
when(indexCacheLoader.load(any())).then(new Returns(expectedIndices1));
// each index is has memory usage of 2, and limit is 2*types of idx, so all should be loaded
List<IndexMetadata> actualSplitIndex = indexCache.getIndices(catalog, table, testHiveSplit, effectivePredicate, testPartitions);
assertEquals(actualSplitIndex.size(), 0);
Thread.sleep(loadDelay + 2000);
actualSplitIndex = indexCache.getIndices(catalog, table, testHiveSplit, effectivePredicate, testPartitions);
assertEquals(actualSplitIndex.size(), numberOfIndexTypes);
assertEquals(actualSplitIndex.get(0), indexMetadata1);
assertEquals(indexCache.getCacheSize(), numberOfIndexTypes);
// get index for split2
when(testHiveSplit.getPath()).thenReturn(testPath2);
IndexMetadata indexMetadata2 = mock(IndexMetadata.class);
when(indexMetadata2.getLastModifiedTime()).thenReturn(testLastModifiedTime);
Index index2 = mock(Index.class);
when(indexMetadata2.getIndex()).thenReturn(index2);
when(index2.getMemoryUsage()).thenReturn(new DataSize(2, KILOBYTE).toBytes());
// previous indexes should be evicted bc cache was at max weight limit and new ones should be added
List<IndexMetadata> expectedIndices2 = new LinkedList<>();
expectedIndices2.add(indexMetadata2);
when(indexCacheLoader.load(any())).then(new Returns(expectedIndices2));
actualSplitIndex = indexCache.getIndices(catalog, table, testHiveSplit, effectivePredicate, testPartitions);
assertEquals(actualSplitIndex.size(), 0);
assertEquals(indexCache.getCacheSize(), numberOfIndexTypes);
Thread.sleep(loadDelay + 2000);
actualSplitIndex = indexCache.getIndices(catalog, table, testHiveSplit, effectivePredicate, testPartitions);
assertEquals(actualSplitIndex.size(), numberOfIndexTypes);
assertEquals(actualSplitIndex.get(0), indexMetadata2);
assertEquals(indexCache.getCacheSize(), numberOfIndexTypes);
// get index for split1
when(testHiveSplit.getPath()).thenReturn(testPath);
actualSplitIndex = indexCache.getIndices(catalog, table, testHiveSplit, effectivePredicate, testPartitions);
assertEquals(actualSplitIndex.size(), 0);
assertEquals(indexCache.getCacheSize(), numberOfIndexTypes);
}
}
use of io.prestosql.spi.heuristicindex.Index in project hetu-core by openlookeng.
the class BitmapIndex method addValues.
@Override
public boolean addValues(List<Pair<String, List<Object>>> values) throws IOException {
checkClosed();
// values can only be added once
if (!updateAllowed.getAndSet(false)) {
throw new UnsupportedOperationException("Unable to update index. " + "An existing Btree index can not be updated because all values must be added together since the " + "position of the values is important.");
}
if (values.size() != 1) {
throw new UnsupportedOperationException("Only single column is supported.");
}
List<Object> columnValues = values.get(0).getSecond();
Map<Object, ArrayList<Integer>> positions = new HashMap<>();
for (int i = 0; i < columnValues.size(); i++) {
Object value = columnValues.get(i);
if (value != null) {
positions.computeIfAbsent(value, k -> new ArrayList<>()).add(i);
}
}
if (positions.isEmpty()) {
return true;
}
List<kotlin.Pair> bitmaps = new ArrayList<>(positions.size());
for (Map.Entry<Object, ArrayList<Integer>> e : positions.entrySet()) {
int[] valuePositions = ArrayUtils.toPrimitive(e.getValue().toArray(new Integer[0]));
RoaringBitmap rr = RoaringBitmap.bitmapOf(valuePositions);
rr.runOptimize();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(bos);
rr.serialize(dos);
dos.close();
Object value = convertToSupportedType(e.getKey());
bitmaps.add(new kotlin.Pair(value, bos.toByteArray()));
}
Collections.sort(bitmaps, (o1, o2) -> ((Comparable) o1.component1()).compareTo(o2.component1()));
getBtreeWriteOptimized(bitmaps.iterator().next().component1(), bitmaps.iterator());
return true;
}
use of io.prestosql.spi.heuristicindex.Index in project hetu-core by openlookeng.
the class BTreeIndex method deserialize.
@Override
public Index deserialize(InputStream in) throws IOException {
try (OutputStream out = new FileOutputStream(dataFile)) {
IOUtils.copy(new SnappyInputStream(in), out);
}
setupDB();
Properties localProperties = getProperties();
if (localProperties.getProperty(PartitionIndexWriter.SYMBOL_TABLE_KEY_NAME) != null) {
this.symbolTable = SerializationUtils.deserializeMap(localProperties.getProperty(PartitionIndexWriter.SYMBOL_TABLE_KEY_NAME), s -> s, s -> s);
}
return this;
}
use of io.prestosql.spi.heuristicindex.Index in project hetu-core by openlookeng.
the class FileIndexWriter method addData.
/**
* This method IS thread-safe. Multiple operators can add data to one writer in parallel.
*
* @param values values to be indexed
* @param connectorMetadata metadata for the index
*/
@Override
public void addData(Map<String, List<Object>> values, Properties connectorMetadata) throws IOException {
long stripeOffset = Long.parseLong(connectorMetadata.getProperty(DATASOURCE_STRIPE_OFFSET));
// Add values first
indexPages.computeIfAbsent(stripeOffset, k -> new ConcurrentHashMap<>());
for (Map.Entry<String, List<Object>> e : values.entrySet()) {
indexPages.get(stripeOffset).computeIfAbsent(e.getKey(), k -> Collections.synchronizedList(new LinkedList<>())).add(new AbstractMap.SimpleEntry(e.getValue(), Integer.parseInt(connectorMetadata.getProperty(DATASOURCE_PAGE_NUMBER))));
}
// Update page count
int current = pageCountExpected.computeIfAbsent(stripeOffset, k -> new AtomicInteger()).decrementAndGet();
if (connectorMetadata.getProperty(DATASOURCE_TOTAL_PAGES) != null) {
int expected = Integer.parseInt(connectorMetadata.getProperty(DATASOURCE_TOTAL_PAGES));
int updatedCurrent = pageCountExpected.get(stripeOffset).addAndGet(expected);
LOG.debug("offset %d finishing page received, expected page count: %d, actual received: %d, remaining: %d", stripeOffset, expected, -current, updatedCurrent);
}
// Check page count to know if all pages have been received for a stripe. Persist and delete values if true to save memory
if (pageCountExpected.get(stripeOffset).get() == 0) {
synchronized (pageCountExpected.get(stripeOffset)) {
if (indexPages.containsKey(stripeOffset)) {
LOG.debug("All pages for offset %d have been received. Persisting.", stripeOffset);
// sort the stripe's pages and collect the values into a single list
List<Pair<String, List<Object>>> columnValuesMap = new ArrayList<>();
// each entry represents a mapping from column name -> list<entry<page values, page number>>
for (Map.Entry<String, List<Map.Entry<List<Object>, Integer>>> entry : indexPages.get(stripeOffset).entrySet()) {
// sort the page values lists based on page numbers
entry.getValue().sort(Comparator.comparingInt(Map.Entry::getValue));
// collect all page values lists into a single list
List<Object> columnValues = entry.getValue().stream().map(Map.Entry::getKey).flatMap(Collection::stream).collect(Collectors.toList());
columnValuesMap.add(new Pair(entry.getKey(), columnValues));
}
persistStripe(stripeOffset, columnValuesMap);
indexPages.remove(stripeOffset);
} else {
LOG.debug("All pages for offset %d have been received, but the values are missing. " + "This stripe should have already been persisted by another thread.", stripeOffset);
}
}
}
}
use of io.prestosql.spi.heuristicindex.Index in project hetu-core by openlookeng.
the class HeuristicIndexClient method readPartitionIndex.
@Override
public List<IndexMetadata> readPartitionIndex(String path) throws IOException {
Path indexKeyPath = Paths.get(path);
Path absolutePath = Paths.get(this.root.toString(), path);
String tableName = indexKeyPath.subpath(0, 1).toString();
String column = indexKeyPath.subpath(1, 2).toString();
List<IndexMetadata> result = new ArrayList<>();
if (fs.exists(absolutePath)) {
List<Path> paths = fs.walk(absolutePath).filter(p -> !fs.isDirectory(p)).collect(Collectors.toList());
for (Path filePath : paths) {
BTreeIndex index = new BTreeIndex();
InputStream inputStream = fs.newInputStream(filePath);
index.deserialize(inputStream);
IndexMetadata indexMetadata = new IndexMetadata(index, tableName, new String[] { column }, root.toString(), filePath.toString(), 0L, 0L);
result.add(indexMetadata);
}
return result;
} else {
LOG.debug("File path doesn't exists" + absolutePath);
return ImmutableList.of();
}
}
Aggregations