use of io.prestosql.spi.HetuConstant.DATASOURCE_STRIPE_OFFSET in project hetu-core by openlookeng.
the class OrcRecordReader method nextPage.
public Page nextPage() throws IOException {
ColumnReader[] columnsReader = getColumnReaders();
int batchSize = prepareNextBatch();
if (batchSize < 0) {
return null;
}
for (ColumnReader column : columnsReader) {
if (column != null) {
column.prepareNextRead(batchSize);
}
}
batchRead(batchSize);
matchingRowsInBatchArray = null;
validateWritePageChecksum(batchSize);
// create a lazy page
blockFactory.nextPage();
Arrays.fill(currentBytesPerCell, 0);
Block[] blocks = new Block[columnsReader.length];
for (int i = 0; i < columnsReader.length; i++) {
int columnIndex = i;
blocks[columnIndex] = blockFactory.createBlock(batchSize, () -> filterRows(columnsReader[columnIndex].readBlock()), block -> blockLoaded(columnIndex, block));
}
// only include page metadata if enabled
if (pageMetadataEnabled) {
Properties pageMetadata = new Properties();
pageCount++;
pageMetadata.setProperty(DATASOURCE_PAGE_NUMBER, String.valueOf(pageCount));
if (isCurrentStripeFinished()) {
// Only set the total page count when the current stripe has finished
// Therefore whenever this property is available in pageMetaData,
// it indicates that the stripe has finished and this is the last page
pageMetadata.setProperty(DATASOURCE_TOTAL_PAGES, String.valueOf(pageCount));
pageCount = 0;
}
pageMetadata.setProperty(DATASOURCE_STRIPE_NUMBER, String.valueOf(currentStripe));
pageMetadata.setProperty(DATASOURCE_STRIPE_OFFSET, String.valueOf(stripes.get(currentStripe).getOffset()));
pageMetadata.setProperty(DATASOURCE_STRIPE_LENGTH, String.valueOf(stripes.get(currentStripe).getTotalLength()));
if (splitMetadata != null) {
// Skip setting for testing (splitMetadata set as null)
pageMetadata.setProperty(DATASOURCE_FILE_PATH, splitMetadata.getSplitIdentity());
pageMetadata.setProperty(DATASOURCE_FILE_MODIFICATION, String.valueOf(splitMetadata.getLastModifiedTime()));
}
pageMetadata.setProperty(DATASOURCE_INDEX_LEVEL, "STRIPE");
return new Page(batchSize, pageMetadata, blocks);
} else {
return new Page(batchSize, blocks);
}
}
use of io.prestosql.spi.HetuConstant.DATASOURCE_STRIPE_OFFSET in project hetu-core by openlookeng.
the class FileIndexWriter method addData.
/**
* This method IS thread-safe. Multiple operators can add data to one writer in parallel.
*
* @param values values to be indexed
* @param connectorMetadata metadata for the index
*/
@Override
public void addData(Map<String, List<Object>> values, Properties connectorMetadata) throws IOException {
long stripeOffset = Long.parseLong(connectorMetadata.getProperty(DATASOURCE_STRIPE_OFFSET));
// Add values first
indexPages.computeIfAbsent(stripeOffset, k -> new ConcurrentHashMap<>());
for (Map.Entry<String, List<Object>> e : values.entrySet()) {
indexPages.get(stripeOffset).computeIfAbsent(e.getKey(), k -> Collections.synchronizedList(new LinkedList<>())).add(new AbstractMap.SimpleEntry(e.getValue(), Integer.parseInt(connectorMetadata.getProperty(DATASOURCE_PAGE_NUMBER))));
}
// Update page count
int current = pageCountExpected.computeIfAbsent(stripeOffset, k -> new AtomicInteger()).decrementAndGet();
if (connectorMetadata.getProperty(DATASOURCE_TOTAL_PAGES) != null) {
int expected = Integer.parseInt(connectorMetadata.getProperty(DATASOURCE_TOTAL_PAGES));
int updatedCurrent = pageCountExpected.get(stripeOffset).addAndGet(expected);
LOG.debug("offset %d finishing page received, expected page count: %d, actual received: %d, remaining: %d", stripeOffset, expected, -current, updatedCurrent);
}
// Check page count to know if all pages have been received for a stripe. Persist and delete values if true to save memory
if (pageCountExpected.get(stripeOffset).get() == 0) {
synchronized (pageCountExpected.get(stripeOffset)) {
if (indexPages.containsKey(stripeOffset)) {
LOG.debug("All pages for offset %d have been received. Persisting.", stripeOffset);
// sort the stripe's pages and collect the values into a single list
List<Pair<String, List<Object>>> columnValuesMap = new ArrayList<>();
// each entry represents a mapping from column name -> list<entry<page values, page number>>
for (Map.Entry<String, List<Map.Entry<List<Object>, Integer>>> entry : indexPages.get(stripeOffset).entrySet()) {
// sort the page values lists based on page numbers
entry.getValue().sort(Comparator.comparingInt(Map.Entry::getValue));
// collect all page values lists into a single list
List<Object> columnValues = entry.getValue().stream().map(Map.Entry::getKey).flatMap(Collection::stream).collect(Collectors.toList());
columnValuesMap.add(new Pair(entry.getKey(), columnValues));
}
persistStripe(stripeOffset, columnValuesMap);
indexPages.remove(stripeOffset);
} else {
LOG.debug("All pages for offset %d have been received, but the values are missing. " + "This stripe should have already been persisted by another thread.", stripeOffset);
}
}
}
}
Aggregations