Search in sources :

Example 1 with DATASOURCE_STRIPE_OFFSET

use of io.prestosql.spi.HetuConstant.DATASOURCE_STRIPE_OFFSET in project hetu-core by openlookeng.

the class OrcRecordReader method nextPage.

public Page nextPage() throws IOException {
    ColumnReader[] columnsReader = getColumnReaders();
    int batchSize = prepareNextBatch();
    if (batchSize < 0) {
        return null;
    }
    for (ColumnReader column : columnsReader) {
        if (column != null) {
            column.prepareNextRead(batchSize);
        }
    }
    batchRead(batchSize);
    matchingRowsInBatchArray = null;
    validateWritePageChecksum(batchSize);
    // create a lazy page
    blockFactory.nextPage();
    Arrays.fill(currentBytesPerCell, 0);
    Block[] blocks = new Block[columnsReader.length];
    for (int i = 0; i < columnsReader.length; i++) {
        int columnIndex = i;
        blocks[columnIndex] = blockFactory.createBlock(batchSize, () -> filterRows(columnsReader[columnIndex].readBlock()), block -> blockLoaded(columnIndex, block));
    }
    // only include page metadata if enabled
    if (pageMetadataEnabled) {
        Properties pageMetadata = new Properties();
        pageCount++;
        pageMetadata.setProperty(DATASOURCE_PAGE_NUMBER, String.valueOf(pageCount));
        if (isCurrentStripeFinished()) {
            // Only set the total page count when the current stripe has finished
            // Therefore whenever this property is available in pageMetaData,
            // it indicates that the stripe has finished and this is the last page
            pageMetadata.setProperty(DATASOURCE_TOTAL_PAGES, String.valueOf(pageCount));
            pageCount = 0;
        }
        pageMetadata.setProperty(DATASOURCE_STRIPE_NUMBER, String.valueOf(currentStripe));
        pageMetadata.setProperty(DATASOURCE_STRIPE_OFFSET, String.valueOf(stripes.get(currentStripe).getOffset()));
        pageMetadata.setProperty(DATASOURCE_STRIPE_LENGTH, String.valueOf(stripes.get(currentStripe).getTotalLength()));
        if (splitMetadata != null) {
            // Skip setting for testing (splitMetadata set as null)
            pageMetadata.setProperty(DATASOURCE_FILE_PATH, splitMetadata.getSplitIdentity());
            pageMetadata.setProperty(DATASOURCE_FILE_MODIFICATION, String.valueOf(splitMetadata.getLastModifiedTime()));
        }
        pageMetadata.setProperty(DATASOURCE_INDEX_LEVEL, "STRIPE");
        return new Page(batchSize, pageMetadata, blocks);
    } else {
        return new Page(batchSize, blocks);
    }
}
Also used : IntStream(java.util.stream.IntStream) StripeStatistics(io.prestosql.orc.metadata.statistics.StripeStatistics) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) ColumnReaders.createColumnReader(io.prestosql.orc.reader.ColumnReaders.createColumnReader) DATASOURCE_TOTAL_PAGES(io.prestosql.spi.HetuConstant.DATASOURCE_TOTAL_PAGES) Slice(io.airlift.slice.Slice) Logger(io.airlift.log.Logger) DATASOURCE_FILE_PATH(io.prestosql.spi.HetuConstant.DATASOURCE_FILE_PATH) PeekingIterator(com.google.common.collect.PeekingIterator) Function(java.util.function.Function) ArrayList(java.util.ArrayList) DATASOURCE_STRIPE_NUMBER(io.prestosql.spi.HetuConstant.DATASOURCE_STRIPE_NUMBER) DATASOURCE_STRIPE_OFFSET(io.prestosql.spi.HetuConstant.DATASOURCE_STRIPE_OFFSET) Slices(io.airlift.slice.Slices) Map(java.util.Map) AggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext) Type(io.prestosql.spi.type.Type) Math.toIntExact(java.lang.Math.toIntExact) Block(io.prestosql.spi.block.Block) ColumnReaders(io.prestosql.orc.reader.ColumnReaders) Properties(java.util.Properties) ImmutableMap(com.google.common.collect.ImmutableMap) DATASOURCE_FILE_MODIFICATION(io.prestosql.spi.HetuConstant.DATASOURCE_FILE_MODIFICATION) OrcType(io.prestosql.orc.metadata.OrcType) HiveWriterVersion(io.prestosql.orc.metadata.PostScript.HiveWriterVersion) Page(io.prestosql.spi.Page) IOException(java.io.IOException) Maps(com.google.common.collect.Maps) ColumnMetadata(io.prestosql.orc.metadata.ColumnMetadata) DATASOURCE_INDEX_LEVEL(io.prestosql.spi.HetuConstant.DATASOURCE_INDEX_LEVEL) ColumnReader(io.prestosql.orc.reader.ColumnReader) DATASOURCE_PAGE_NUMBER(io.prestosql.spi.HetuConstant.DATASOURCE_PAGE_NUMBER) MetadataReader(io.prestosql.orc.metadata.MetadataReader) StripeInformation(io.prestosql.orc.metadata.StripeInformation) DataSize(io.airlift.units.DataSize) List(java.util.List) ClassLayout(org.openjdk.jol.info.ClassLayout) Domain(io.prestosql.spi.predicate.Domain) DATASOURCE_STRIPE_LENGTH(io.prestosql.spi.HetuConstant.DATASOURCE_STRIPE_LENGTH) ColumnStatistics(io.prestosql.orc.metadata.statistics.ColumnStatistics) Optional(java.util.Optional) VisibleForTesting(com.google.common.annotations.VisibleForTesting) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) SplitMetadata(io.prestosql.spi.heuristicindex.SplitMetadata) Block(io.prestosql.spi.block.Block) Page(io.prestosql.spi.Page) ColumnReaders.createColumnReader(io.prestosql.orc.reader.ColumnReaders.createColumnReader) ColumnReader(io.prestosql.orc.reader.ColumnReader) Properties(java.util.Properties)

Example 2 with DATASOURCE_STRIPE_OFFSET

use of io.prestosql.spi.HetuConstant.DATASOURCE_STRIPE_OFFSET in project hetu-core by openlookeng.

the class FileIndexWriter method addData.

/**
 * This method IS thread-safe. Multiple operators can add data to one writer in parallel.
 *
 * @param values            values to be indexed
 * @param connectorMetadata metadata for the index
 */
@Override
public void addData(Map<String, List<Object>> values, Properties connectorMetadata) throws IOException {
    long stripeOffset = Long.parseLong(connectorMetadata.getProperty(DATASOURCE_STRIPE_OFFSET));
    // Add values first
    indexPages.computeIfAbsent(stripeOffset, k -> new ConcurrentHashMap<>());
    for (Map.Entry<String, List<Object>> e : values.entrySet()) {
        indexPages.get(stripeOffset).computeIfAbsent(e.getKey(), k -> Collections.synchronizedList(new LinkedList<>())).add(new AbstractMap.SimpleEntry(e.getValue(), Integer.parseInt(connectorMetadata.getProperty(DATASOURCE_PAGE_NUMBER))));
    }
    // Update page count
    int current = pageCountExpected.computeIfAbsent(stripeOffset, k -> new AtomicInteger()).decrementAndGet();
    if (connectorMetadata.getProperty(DATASOURCE_TOTAL_PAGES) != null) {
        int expected = Integer.parseInt(connectorMetadata.getProperty(DATASOURCE_TOTAL_PAGES));
        int updatedCurrent = pageCountExpected.get(stripeOffset).addAndGet(expected);
        LOG.debug("offset %d finishing page received, expected page count: %d, actual received: %d, remaining: %d", stripeOffset, expected, -current, updatedCurrent);
    }
    // Check page count to know if all pages have been received for a stripe. Persist and delete values if true to save memory
    if (pageCountExpected.get(stripeOffset).get() == 0) {
        synchronized (pageCountExpected.get(stripeOffset)) {
            if (indexPages.containsKey(stripeOffset)) {
                LOG.debug("All pages for offset %d have been received. Persisting.", stripeOffset);
                // sort the stripe's pages and collect the values into a single list
                List<Pair<String, List<Object>>> columnValuesMap = new ArrayList<>();
                // each entry represents a mapping from column name -> list<entry<page values, page number>>
                for (Map.Entry<String, List<Map.Entry<List<Object>, Integer>>> entry : indexPages.get(stripeOffset).entrySet()) {
                    // sort the page values lists based on page numbers
                    entry.getValue().sort(Comparator.comparingInt(Map.Entry::getValue));
                    // collect all page values lists into a single list
                    List<Object> columnValues = entry.getValue().stream().map(Map.Entry::getKey).flatMap(Collection::stream).collect(Collectors.toList());
                    columnValuesMap.add(new Pair(entry.getKey(), columnValues));
                }
                persistStripe(stripeOffset, columnValuesMap);
                indexPages.remove(stripeOffset);
            } else {
                LOG.debug("All pages for offset %d have been received, but the values are missing. " + "This stripe should have already been persisted by another thread.", stripeOffset);
            }
        }
    }
}
Also used : DATASOURCE_TOTAL_PAGES(io.prestosql.spi.HetuConstant.DATASOURCE_TOTAL_PAGES) Logger(io.airlift.log.Logger) HetuFileSystemClient(io.prestosql.spi.filesystem.HetuFileSystemClient) IndexConstants(io.hetu.core.heuristicindex.util.IndexConstants) IndexServiceUtils(io.hetu.core.heuristicindex.util.IndexServiceUtils) HetuLocalFileSystemClient(io.hetu.core.filesystem.HetuLocalFileSystemClient) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) DATASOURCE_STRIPE_OFFSET(io.prestosql.spi.HetuConstant.DATASOURCE_STRIPE_OFFSET) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) HetuConstant(io.prestosql.spi.HetuConstant) Locale(java.util.Locale) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) LinkedList(java.util.LinkedList) Path(java.nio.file.Path) OutputStream(java.io.OutputStream) Properties(java.util.Properties) IndexWriter(io.prestosql.spi.heuristicindex.IndexWriter) Files(java.nio.file.Files) Collection(java.util.Collection) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) IOException(java.io.IOException) LocalConfig(io.hetu.core.filesystem.LocalConfig) Collectors(java.util.stream.Collectors) Pair(io.prestosql.spi.heuristicindex.Pair) DATASOURCE_PAGE_NUMBER(io.prestosql.spi.HetuConstant.DATASOURCE_PAGE_NUMBER) AbstractMap(java.util.AbstractMap) List(java.util.List) Paths(java.nio.file.Paths) CreateIndexMetadata(io.prestosql.spi.connector.CreateIndexMetadata) Comparator(java.util.Comparator) Index(io.prestosql.spi.heuristicindex.Index) Collections(java.util.Collections) ArrayList(java.util.ArrayList) AbstractMap(java.util.AbstractMap) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) AbstractMap(java.util.AbstractMap) Pair(io.prestosql.spi.heuristicindex.Pair)

Aggregations

Logger (io.airlift.log.Logger)2 DATASOURCE_PAGE_NUMBER (io.prestosql.spi.HetuConstant.DATASOURCE_PAGE_NUMBER)2 DATASOURCE_STRIPE_OFFSET (io.prestosql.spi.HetuConstant.DATASOURCE_STRIPE_OFFSET)2 DATASOURCE_TOTAL_PAGES (io.prestosql.spi.HetuConstant.DATASOURCE_TOTAL_PAGES)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Map (java.util.Map)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 Maps (com.google.common.collect.Maps)1 PeekingIterator (com.google.common.collect.PeekingIterator)1 Slice (io.airlift.slice.Slice)1 Slices (io.airlift.slice.Slices)1 DataSize (io.airlift.units.DataSize)1 HetuLocalFileSystemClient (io.hetu.core.filesystem.HetuLocalFileSystemClient)1 LocalConfig (io.hetu.core.filesystem.LocalConfig)1 IndexConstants (io.hetu.core.heuristicindex.util.IndexConstants)1 IndexServiceUtils (io.hetu.core.heuristicindex.util.IndexServiceUtils)1 AggregatedMemoryContext (io.prestosql.memory.context.AggregatedMemoryContext)1