Search in sources :

Example 1 with WriteIdInfo

use of io.prestosql.plugin.hive.WriteIdInfo in project boostkit-bigdata by kunpengcompute.

the class OrcDeletedRows method isDeleted.

private boolean isDeleted(OrcAcidRowId sourcePageRowId) {
    if (sortedRowsIterator == null) {
        for (WriteIdInfo deleteDeltaInfo : deleteDeltaLocations.getDeleteDeltas()) {
            Path path = createPath(deleteDeltaLocations.getPartitionLocation(), deleteDeltaInfo, sourceFileName);
            try {
                FileSystem fileSystem = hdfsEnvironment.getFileSystem(sessionUser, path, configuration);
                FileStatus fileStatus = hdfsEnvironment.doAs(sessionUser, () -> fileSystem.getFileStatus(path));
                pageSources.add(pageSourceFactory.createPageSource(fileStatus.getPath(), fileStatus.getLen(), fileStatus.getModificationTime()));
            } catch (FileNotFoundException ignored) {
                // source file does not have a delta delete file in this location
                continue;
            } catch (PrestoException e) {
                throw e;
            } catch (OrcCorruptionException e) {
                throw new PrestoException(HiveErrorCode.HIVE_BAD_DATA, format("Failed to read ORC file: %s", path), e);
            } catch (RuntimeException | IOException e) {
                throw new PrestoException(HiveErrorCode.HIVE_CURSOR_ERROR, format("Failed to read ORC file: %s", path), e);
            }
        }
        List<Type> columnTypes = ImmutableList.of(BigintType.BIGINT, IntegerType.INTEGER, BigintType.BIGINT);
        // Last index for rowIdHandle
        List<Integer> sortFields = ImmutableList.of(0, 1, 2);
        List<SortOrder> sortOrders = ImmutableList.of(SortOrder.ASC_NULLS_FIRST, SortOrder.ASC_NULLS_FIRST, SortOrder.ASC_NULLS_FIRST);
        sortedRowsIterator = HiveUtil.getMergeSortedPages(pageSources, columnTypes, sortFields, sortOrders);
    }
    do {
        if (currentPage == null || currentPageOffset >= currentPage.getPositionCount()) {
            currentPage = null;
            currentPageOffset = 0;
            if (sortedRowsIterator.hasNext()) {
                currentPage = sortedRowsIterator.next();
            } else {
                // No more entries in deleted_delta
                return false;
            }
        }
        do {
            deletedRowId.set(currentPage, currentPageOffset);
            if (deletedRowId.compareTo(sourcePageRowId) == 0) {
                // source row is deleted.
                return true;
            } else if (deletedRowId.compareTo(sourcePageRowId) > 0) {
                // So current source row is not deleted.
                return false;
            }
            currentPageOffset++;
        } while (currentPageOffset < currentPage.getPositionCount());
    } while (sortedRowsIterator.hasNext());
    // No more entries;
    return false;
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) FileNotFoundException(java.io.FileNotFoundException) SortOrder(io.prestosql.spi.block.SortOrder) PrestoException(io.prestosql.spi.PrestoException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) BigintType(io.prestosql.spi.type.BigintType) Type(io.prestosql.spi.type.Type) IntegerType(io.prestosql.spi.type.IntegerType) FileSystem(org.apache.hadoop.fs.FileSystem) WriteIdInfo(io.prestosql.plugin.hive.WriteIdInfo) OrcCorruptionException(io.prestosql.orc.OrcCorruptionException)

Example 2 with WriteIdInfo

use of io.prestosql.plugin.hive.WriteIdInfo in project hetu-core by openlookeng.

the class OrcDeletedRows method isDeleted.

private boolean isDeleted(OrcAcidRowId sourcePageRowId) {
    if (sortedRowsIterator == null) {
        for (WriteIdInfo deleteDeltaInfo : deleteDeltaLocations.getDeleteDeltas()) {
            Path path = createPath(deleteDeltaLocations.getPartitionLocation(), deleteDeltaInfo, sourceFileName);
            try {
                FileSystem fileSystem = hdfsEnvironment.getFileSystem(sessionUser, path, configuration);
                FileStatus fileStatus = hdfsEnvironment.doAs(sessionUser, () -> fileSystem.getFileStatus(path));
                pageSources.add(pageSourceFactory.createPageSource(fileStatus.getPath(), fileStatus.getLen(), fileStatus.getModificationTime()));
            } catch (FileNotFoundException ignored) {
                // source file does not have a delta delete file in this location
                continue;
            } catch (PrestoException e) {
                throw e;
            } catch (OrcCorruptionException e) {
                throw new PrestoException(HiveErrorCode.HIVE_BAD_DATA, format("Failed to read ORC file: %s", path), e);
            } catch (RuntimeException | IOException e) {
                throw new PrestoException(HiveErrorCode.HIVE_CURSOR_ERROR, format("Failed to read ORC file: %s", path), e);
            }
        }
        List<Type> columnTypes = ImmutableList.of(BigintType.BIGINT, IntegerType.INTEGER, BigintType.BIGINT);
        // Last index for rowIdHandle
        List<Integer> sortFields = ImmutableList.of(0, 1, 2);
        List<SortOrder> sortOrders = ImmutableList.of(SortOrder.ASC_NULLS_FIRST, SortOrder.ASC_NULLS_FIRST, SortOrder.ASC_NULLS_FIRST);
        sortedRowsIterator = HiveUtil.getMergeSortedPages(pageSources, columnTypes, sortFields, sortOrders);
    }
    do {
        if (currentPage == null || currentPageOffset >= currentPage.getPositionCount()) {
            currentPage = null;
            currentPageOffset = 0;
            if (sortedRowsIterator.hasNext()) {
                currentPage = sortedRowsIterator.next();
            } else {
                // No more entries in deleted_delta
                return false;
            }
        }
        do {
            deletedRowId.set(currentPage, currentPageOffset);
            if (deletedRowId.compareTo(sourcePageRowId) == 0) {
                // source row is deleted.
                return true;
            } else if (deletedRowId.compareTo(sourcePageRowId) > 0) {
                // So current source row is not deleted.
                return false;
            }
            currentPageOffset++;
        } while (currentPageOffset < currentPage.getPositionCount());
    } while (sortedRowsIterator.hasNext());
    // No more entries;
    return false;
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) FileNotFoundException(java.io.FileNotFoundException) SortOrder(io.prestosql.spi.block.SortOrder) PrestoException(io.prestosql.spi.PrestoException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) BigintType(io.prestosql.spi.type.BigintType) Type(io.prestosql.spi.type.Type) IntegerType(io.prestosql.spi.type.IntegerType) FileSystem(org.apache.hadoop.fs.FileSystem) WriteIdInfo(io.prestosql.plugin.hive.WriteIdInfo) OrcCorruptionException(io.prestosql.orc.OrcCorruptionException)

Aggregations

OrcCorruptionException (io.prestosql.orc.OrcCorruptionException)2 WriteIdInfo (io.prestosql.plugin.hive.WriteIdInfo)2 PrestoException (io.prestosql.spi.PrestoException)2 SortOrder (io.prestosql.spi.block.SortOrder)2 BigintType (io.prestosql.spi.type.BigintType)2 IntegerType (io.prestosql.spi.type.IntegerType)2 Type (io.prestosql.spi.type.Type)2 FileNotFoundException (java.io.FileNotFoundException)2 IOException (java.io.IOException)2 UncheckedIOException (java.io.UncheckedIOException)2 FileStatus (org.apache.hadoop.fs.FileStatus)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 Path (org.apache.hadoop.fs.Path)2