Search in sources :

Example 21 with INTEGER

use of com.facebook.presto.common.type.IntegerType.INTEGER in project presto by prestodb.

the class TestSelectiveOrcReader method testArrays.

@Test
public void testArrays() throws Exception {
    Random random = new Random(0);
    // non-null arrays of varying sizes; some arrays may be empty
    tester.testRoundTrip(arrayType(INTEGER), createList(NUM_ROWS, i -> randomIntegers(random.nextInt(10), random)), IS_NULL, IS_NOT_NULL);
    BigintRange negative = BigintRange.of(Integer.MIN_VALUE, 0, false);
    BigintRange nonNegative = BigintRange.of(0, Integer.MAX_VALUE, false);
    // arrays of strings
    tester.testRoundTrip(arrayType(VARCHAR), createList(1000, i -> randomStrings(5 + random.nextInt(5), random)), ImmutableList.of(toSubfieldFilter("c[1]", IS_NULL), toSubfieldFilter("c[1]", stringIn(true, "a", "b", "c", "d"))));
    tester.testRoundTrip(arrayType(VARCHAR), createList(10, i -> randomStringsWithNulls(5 + random.nextInt(5), random)), ImmutableList.of(toSubfieldFilter("c[1]", IS_NULL), toSubfieldFilter("c[1]", stringIn(true, "a", "b", "c", "d"))));
    // non-empty non-null arrays of varying sizes
    tester.testRoundTrip(arrayType(INTEGER), createList(NUM_ROWS, i -> randomIntegers(5 + random.nextInt(5), random)), ImmutableList.of(toSubfieldFilter(IS_NULL), toSubfieldFilter(IS_NOT_NULL), // c[1] >= 0
    toSubfieldFilter("c[1]", nonNegative), // c[2] >= 0 AND c[4] >= 0
    ImmutableMap.of(new Subfield("c[2]"), nonNegative, new Subfield("c[4]"), nonNegative)));
    // non-null arrays of varying sizes; some arrays may be empty
    tester.testRoundTripTypes(ImmutableList.of(INTEGER, arrayType(INTEGER)), ImmutableList.of(randomIntegers(NUM_ROWS, random), createList(NUM_ROWS, i -> randomIntegers(random.nextInt(10), random))), toSubfieldFilters(ImmutableMap.of(0, nonNegative), ImmutableMap.of(0, nonNegative, 1, IS_NULL), ImmutableMap.of(0, nonNegative, 1, IS_NOT_NULL)));
    // non-empty non-null arrays of varying sizes
    tester.testRoundTripTypes(ImmutableList.of(INTEGER, arrayType(INTEGER)), ImmutableList.of(randomIntegers(NUM_ROWS, random), createList(NUM_ROWS, i -> randomIntegers(5 + random.nextInt(5), random))), ImmutableList.of(// c[1] >= 0
    ImmutableMap.of(0, toSubfieldFilter(nonNegative), 1, toSubfieldFilter("c[1]", nonNegative)), // c[3] >= 0
    ImmutableMap.of(0, toSubfieldFilter(nonNegative), 1, toSubfieldFilter("c[3]", nonNegative)), // c[2] >= 0 AND c[4] <= 0
    ImmutableMap.of(0, toSubfieldFilter(nonNegative), 1, ImmutableMap.of(new Subfield("c[2]"), nonNegative, new Subfield("c[4]"), negative))));
    // nested arrays
    tester.testRoundTripTypes(ImmutableList.of(INTEGER, arrayType(arrayType(INTEGER))), ImmutableList.of(randomIntegers(NUM_ROWS, random), createList(NUM_ROWS, i -> createList(random.nextInt(10), index -> randomIntegers(random.nextInt(5), random)))), toSubfieldFilters(ImmutableMap.of(0, nonNegative), ImmutableMap.of(1, IS_NULL), ImmutableMap.of(1, IS_NOT_NULL), ImmutableMap.of(0, nonNegative, 1, IS_NULL)));
    tester.testRoundTripTypes(ImmutableList.of(INTEGER, arrayType(arrayType(INTEGER))), ImmutableList.of(randomIntegers(NUM_ROWS, random), createList(NUM_ROWS, i -> createList(3 + random.nextInt(10), index -> randomIntegers(3 + random.nextInt(5), random)))), ImmutableList.of(// c[1] IS NULL
    ImmutableMap.of(1, ImmutableMap.of(new Subfield("c[1]"), IS_NULL)), // c[2] IS NOT NULL AND c[2][3] >= 0
    ImmutableMap.of(1, ImmutableMap.of(new Subfield("c[2]"), IS_NOT_NULL, new Subfield("c[2][3]"), nonNegative)), ImmutableMap.of(0, toSubfieldFilter(nonNegative), 1, ImmutableMap.of(new Subfield("c[1]"), IS_NULL))));
}
Also used : CharType.createCharType(com.facebook.presto.common.type.CharType.createCharType) Page(com.facebook.presto.common.Page) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) OrcTester.createCustomOrcSelectiveRecordReader(com.facebook.presto.orc.OrcTester.createCustomOrcSelectiveRecordReader) BigintRange(com.facebook.presto.common.predicate.TupleDomainFilter.BigintRange) Test(org.testng.annotations.Test) Random(java.util.Random) OrcTester.quickSelectiveOrcTester(com.facebook.presto.orc.OrcTester.quickSelectiveOrcTester) SESSION(com.facebook.presto.testing.TestingConnectorSession.SESSION) Iterables.concat(com.google.common.collect.Iterables.concat) Iterables.cycle(com.google.common.collect.Iterables.cycle) Slices(io.airlift.slice.Slices) Map(java.util.Map) HIVE_STORAGE_TIME_ZONE(com.facebook.presto.orc.OrcTester.HIVE_STORAGE_TIME_ZONE) FloatRange(com.facebook.presto.common.predicate.TupleDomainFilter.FloatRange) BigInteger(java.math.BigInteger) SqlDecimal(com.facebook.presto.common.type.SqlDecimal) BigintValuesUsingHashTable(com.facebook.presto.common.predicate.TupleDomainFilter.BigintValuesUsingHashTable) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) OrcTester.mapType(com.facebook.presto.orc.OrcTester.mapType) NONE(com.facebook.presto.orc.metadata.CompressionKind.NONE) Collections.nCopies(java.util.Collections.nCopies) BeforeClass(org.testng.annotations.BeforeClass) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Range(com.google.common.collect.Range) BooleanValue(com.facebook.presto.common.predicate.TupleDomainFilter.BooleanValue) Iterables.limit(com.google.common.collect.Iterables.limit) Assert.assertNotNull(org.testng.Assert.assertNotNull) Streams(com.google.common.collect.Streams) Assertions.assertBetweenInclusive(com.facebook.airlift.testing.Assertions.assertBetweenInclusive) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) SqlTimestamp(com.facebook.presto.common.type.SqlTimestamp) IS_NOT_NULL(com.facebook.presto.common.predicate.TupleDomainFilter.IS_NOT_NULL) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) CompressionKind(com.facebook.presto.orc.metadata.CompressionKind) Optional(java.util.Optional) IS_NULL(com.facebook.presto.common.predicate.TupleDomainFilter.IS_NULL) IntStream(java.util.stream.IntStream) MAX_BLOCK_SIZE(com.facebook.presto.orc.OrcTester.MAX_BLOCK_SIZE) DecimalType(com.facebook.presto.common.type.DecimalType) ContiguousSet(com.google.common.collect.ContiguousSet) Slice(io.airlift.slice.Slice) Assert.assertNull(org.testng.Assert.assertNull) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) VARCHAR(com.facebook.presto.common.type.VarcharType.VARCHAR) DateTimeTestingUtils.sqlTimestampOf(com.facebook.presto.testing.DateTimeTestingUtils.sqlTimestampOf) Assert.assertEquals(org.testng.Assert.assertEquals) TIMESTAMP(com.facebook.presto.common.type.TimestampType.TIMESTAMP) Function(java.util.function.Function) DATE(com.facebook.presto.common.type.DateType.DATE) REAL(com.facebook.presto.common.type.RealType.REAL) BytesRange(com.facebook.presto.common.predicate.TupleDomainFilter.BytesRange) ArrayList(java.util.ArrayList) Strings(com.google.common.base.Strings) ZLIB(com.facebook.presto.orc.metadata.CompressionKind.ZLIB) SqlDate(com.facebook.presto.common.type.SqlDate) Subfield(com.facebook.presto.common.Subfield) ImmutableList(com.google.common.collect.ImmutableList) SqlVarbinary(com.facebook.presto.common.type.SqlVarbinary) DiscreteDomain(com.google.common.collect.DiscreteDomain) OrcTester.writeOrcColumnsPresto(com.facebook.presto.orc.OrcTester.writeOrcColumnsPresto) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) CharType(com.facebook.presto.common.type.CharType) Type(com.facebook.presto.common.type.Type) MAX_BATCH_SIZE(com.facebook.presto.orc.OrcReader.MAX_BATCH_SIZE) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) OrcTester.arrayType(com.facebook.presto.orc.OrcTester.arrayType) InvalidFunctionArgumentException(com.facebook.presto.common.InvalidFunctionArgumentException) Iterator(java.util.Iterator) UTF_8(java.nio.charset.StandardCharsets.UTF_8) Assert.fail(org.testng.Assert.fail) AbstractIterator(com.google.common.collect.AbstractIterator) TupleDomainFilterUtils.toBigintValues(com.facebook.presto.common.predicate.TupleDomainFilterUtils.toBigintValues) VARBINARY(com.facebook.presto.common.type.VarbinaryType.VARBINARY) Maps(com.google.common.collect.Maps) Ints(com.google.common.primitives.Ints) TupleDomainFilter(com.facebook.presto.common.predicate.TupleDomainFilter) DWRF(com.facebook.presto.orc.OrcTester.Format.DWRF) OrcReaderSettings(com.facebook.presto.orc.OrcTester.OrcReaderSettings) Collectors.toList(java.util.stream.Collectors.toList) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) OrcTester.rowType(com.facebook.presto.orc.OrcTester.rowType) TestingOrcPredicate.createOrcPredicate(com.facebook.presto.orc.TestingOrcPredicate.createOrcPredicate) Assert.assertTrue(org.testng.Assert.assertTrue) Block(com.facebook.presto.common.block.Block) BytesValues(com.facebook.presto.common.predicate.TupleDomainFilter.BytesValues) DoubleRange(com.facebook.presto.common.predicate.TupleDomainFilter.DoubleRange) Collections(java.util.Collections) ZSTD(com.facebook.presto.orc.metadata.CompressionKind.ZSTD) Random(java.util.Random) BigintRange(com.facebook.presto.common.predicate.TupleDomainFilter.BigintRange) Subfield(com.facebook.presto.common.Subfield) Test(org.testng.annotations.Test)

Example 22 with INTEGER

use of com.facebook.presto.common.type.IntegerType.INTEGER in project presto by prestodb.

the class TupleDomainParquetPredicate method getDomain.

/**
 * Get a domain for the ranges defined by each pair of elements from {@code minimums} and {@code maximums}.
 * Both arrays must have the same length.
 */
private static Domain getDomain(ColumnDescriptor column, Type type, List<Object> minimums, List<Object> maximums, boolean hasNullValue) {
    checkArgument(minimums.size() == maximums.size(), "Expected minimums and maximums to have the same size");
    List<Range> ranges = new ArrayList<>();
    if (type.equals(BOOLEAN)) {
        boolean hasTrueValues = minimums.stream().anyMatch(value -> (boolean) value) || maximums.stream().anyMatch(value -> (boolean) value);
        boolean hasFalseValues = minimums.stream().anyMatch(value -> !(boolean) value) || maximums.stream().anyMatch(value -> !(boolean) value);
        if (hasTrueValues && hasFalseValues) {
            return Domain.all(type);
        }
        if (hasTrueValues) {
            return Domain.create(ValueSet.of(type, true), hasNullValue);
        }
        if (hasFalseValues) {
            return Domain.create(ValueSet.of(type, false), hasNullValue);
        }
        // All nulls case is handled earlier
        throw new VerifyException("Impossible boolean statistics");
    }
    if ((type.equals(BIGINT) || type.equals(TINYINT) || type.equals(SMALLINT) || type.equals(INTEGER))) {
        for (int i = 0; i < minimums.size(); i++) {
            long min = asLong(minimums.get(i));
            long max = asLong(maximums.get(i));
            if (isStatisticsOverflow(type, min, max)) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            ranges.add(Range.range(type, min, true, max, true));
        }
        checkArgument(!ranges.isEmpty(), "cannot use empty ranges");
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type.equals(REAL)) {
        for (int i = 0; i < minimums.size(); i++) {
            Float min = (Float) minimums.get(i);
            Float max = (Float) maximums.get(i);
            if (min.isNaN() || max.isNaN()) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            ranges.add(Range.range(type, (long) floatToRawIntBits(min), true, (long) floatToRawIntBits(max), true));
        }
        checkArgument(!ranges.isEmpty(), "cannot use empty ranges");
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type.equals(DOUBLE)) {
        for (int i = 0; i < minimums.size(); i++) {
            Double min = (Double) minimums.get(i);
            Double max = (Double) maximums.get(i);
            if (min.isNaN() || max.isNaN()) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            ranges.add(Range.range(type, min, true, max, true));
        }
        checkArgument(!ranges.isEmpty(), "cannot use empty ranges");
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (isVarcharType(type)) {
        for (int i = 0; i < minimums.size(); i++) {
            Slice min = Slices.wrappedBuffer(((Binary) minimums.get(i)).toByteBuffer());
            Slice max = Slices.wrappedBuffer(((Binary) maximums.get(i)).toByteBuffer());
            ranges.add(Range.range(type, min, true, max, true));
        }
        checkArgument(!ranges.isEmpty(), "cannot use empty ranges");
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    if (type.equals(DATE)) {
        for (int i = 0; i < minimums.size(); i++) {
            long min = asLong(minimums.get(i));
            long max = asLong(maximums.get(i));
            if (isStatisticsOverflow(type, min, max)) {
                return Domain.create(ValueSet.all(type), hasNullValue);
            }
            ranges.add(Range.range(type, min, true, max, true));
        }
        checkArgument(!ranges.isEmpty(), "cannot use empty ranges");
        return Domain.create(ValueSet.ofRanges(ranges), hasNullValue);
    }
    return Domain.create(ValueSet.all(type), hasNullValue);
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) PredicateUtils.isStatisticsOverflow(com.facebook.presto.parquet.predicate.PredicateUtils.isStatisticsOverflow) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) ColumnIndex(org.apache.parquet.internal.column.columnindex.ColumnIndex) FilterApi(org.apache.parquet.filter2.predicate.FilterApi) ByteBuffer(java.nio.ByteBuffer) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Slices(io.airlift.slice.Slices) Map(java.util.Map) Varchars.isVarcharType(com.facebook.presto.common.type.Varchars.isVarcharType) UserDefinedPredicate(org.apache.parquet.filter2.predicate.UserDefinedPredicate) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) BINARY(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) String.format(java.lang.String.format) Range(com.facebook.presto.common.predicate.Range) Binary(org.apache.parquet.io.api.Binary) Serializable(java.io.Serializable) INT64(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64) List(java.util.List) LITTLE_ENDIAN(java.nio.ByteOrder.LITTLE_ENDIAN) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) Optional(java.util.Optional) DictionaryPage(com.facebook.presto.parquet.DictionaryPage) ColumnPath(org.apache.parquet.hadoop.metadata.ColumnPath) Slice(io.airlift.slice.Slice) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) HashMap(java.util.HashMap) INT32(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32) Function(java.util.function.Function) DATE(com.facebook.presto.common.type.DateType.DATE) REAL(com.facebook.presto.common.type.RealType.REAL) ArrayList(java.util.ArrayList) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) ImmutableList(com.google.common.collect.ImmutableList) Objects.requireNonNull(java.util.Objects.requireNonNull) ParquetDataSourceId(com.facebook.presto.parquet.ParquetDataSourceId) PrimitiveTypeName(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) Type(com.facebook.presto.common.type.Type) VerifyException(com.google.common.base.VerifyException) Statistics(org.apache.parquet.column.statistics.Statistics) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) Domain(com.facebook.presto.common.predicate.Domain) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) Dictionary(com.facebook.presto.parquet.dictionary.Dictionary) FLOAT(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) VisibleForTesting(com.google.common.annotations.VisibleForTesting) ValueSet(com.facebook.presto.common.predicate.ValueSet) VerifyException(com.google.common.base.VerifyException) Slice(io.airlift.slice.Slice) ArrayList(java.util.ArrayList) Range(com.facebook.presto.common.predicate.Range)

Example 23 with INTEGER

use of com.facebook.presto.common.type.IntegerType.INTEGER in project presto by prestodb.

the class TestHivePageSink method writeTestFile.

private static long writeTestFile(HiveClientConfig config, MetastoreClientConfig metastoreClientConfig, ExtendedHiveMetastore metastore, String outputPath) {
    HiveTransactionHandle transaction = new HiveTransactionHandle();
    HiveWriterStats stats = new HiveWriterStats();
    ConnectorPageSink pageSink = createPageSink(transaction, config, metastoreClientConfig, metastore, new Path("file:///" + outputPath), stats);
    List<LineItemColumn> columns = getTestColumns();
    List<Type> columnTypes = columns.stream().map(LineItemColumn::getType).map(TestHivePageSink::getHiveType).map(hiveType -> hiveType.getType(FUNCTION_AND_TYPE_MANAGER)).collect(toList());
    PageBuilder pageBuilder = new PageBuilder(columnTypes);
    int rows = 0;
    for (LineItem lineItem : new LineItemGenerator(0.01, 1, 1)) {
        rows++;
        if (rows >= NUM_ROWS) {
            break;
        }
        pageBuilder.declarePosition();
        for (int i = 0; i < columns.size(); i++) {
            LineItemColumn column = columns.get(i);
            BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(i);
            switch(column.getType().getBase()) {
                case IDENTIFIER:
                    BIGINT.writeLong(blockBuilder, column.getIdentifier(lineItem));
                    break;
                case INTEGER:
                    INTEGER.writeLong(blockBuilder, column.getInteger(lineItem));
                    break;
                case DATE:
                    DATE.writeLong(blockBuilder, column.getDate(lineItem));
                    break;
                case DOUBLE:
                    DOUBLE.writeDouble(blockBuilder, column.getDouble(lineItem));
                    break;
                case VARCHAR:
                    createUnboundedVarcharType().writeSlice(blockBuilder, Slices.utf8Slice(column.getString(lineItem)));
                    break;
                default:
                    throw new IllegalArgumentException("Unsupported type " + column.getType());
            }
        }
    }
    Page page = pageBuilder.build();
    pageSink.appendPage(page);
    getFutureValue(pageSink.finish());
    File outputDir = new File(outputPath);
    List<File> files = ImmutableList.copyOf(outputDir.listFiles((dir, name) -> !name.endsWith(".crc")));
    File outputFile = getOnlyElement(files);
    long length = outputFile.length();
    ConnectorPageSource pageSource = createPageSource(transaction, config, metastoreClientConfig, outputFile);
    List<Page> pages = new ArrayList<>();
    while (!pageSource.isFinished()) {
        Page nextPage = pageSource.getNextPage();
        if (nextPage != null) {
            pages.add(nextPage.getLoadedPage());
        }
    }
    MaterializedResult expectedResults = toMaterializedResult(getSession(config), columnTypes, ImmutableList.of(page));
    MaterializedResult results = toMaterializedResult(getSession(config), columnTypes, pages);
    assertEquals(results, expectedResults);
    assertEquals(stats.getInputPageSizeInBytes().getAllTime().getMax(), page.getRetainedSizeInBytes());
    return length;
}
Also used : Path(org.apache.hadoop.fs.Path) Page(com.facebook.presto.common.Page) MetadataManager(com.facebook.presto.metadata.MetadataManager) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) HiveTestUtils.getDefaultHiveFileWriterFactories(com.facebook.presto.hive.HiveTestUtils.getDefaultHiveFileWriterFactories) NO_PREFERENCE(com.facebook.presto.spi.schedule.NodeSelectionStrategy.NO_PREFERENCE) MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) Test(org.testng.annotations.Test) NO_CACHE_REQUIREMENT(com.facebook.presto.hive.CacheQuotaRequirement.NO_CACHE_REQUIREMENT) TpchColumnTypes(io.airlift.tpch.TpchColumnTypes) CacheConfig(com.facebook.presto.cache.CacheConfig) HiveTestUtils.getDefaultOrcFileWriterFactory(com.facebook.presto.hive.HiveTestUtils.getDefaultOrcFileWriterFactory) MoreFutures.getFutureValue(com.facebook.airlift.concurrent.MoreFutures.getFutureValue) DIRECT_TO_TARGET_NEW_DIRECTORY(com.facebook.presto.hive.LocationHandle.WriteMode.DIRECT_TO_TARGET_NEW_DIRECTORY) PageBuilder(com.facebook.presto.common.PageBuilder) SchemaTableName(com.facebook.presto.spi.SchemaTableName) ExtendedHiveMetastore(com.facebook.presto.hive.metastore.ExtendedHiveMetastore) Slices(io.airlift.slice.Slices) ConnectorPageSink(com.facebook.presto.spi.ConnectorPageSink) Path(org.apache.hadoop.fs.Path) HIVE_LONG(com.facebook.presto.hive.HiveType.HIVE_LONG) TpchColumnType(io.airlift.tpch.TpchColumnType) METASTORE_CONTEXT(com.facebook.presto.hive.HiveQueryRunner.METASTORE_CONTEXT) BlockBuilder(com.facebook.presto.common.block.BlockBuilder) ImmutableSet(com.google.common.collect.ImmutableSet) TestHiveUtil.createTestingFileHiveMetastore(com.facebook.presto.hive.TestHiveUtil.createTestingFileHiveMetastore) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) TestingNodeManager(com.facebook.presto.testing.TestingNodeManager) TRUE_CONSTANT(com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT) String.format(java.lang.String.format) ConnectorSession(com.facebook.presto.spi.ConnectorSession) FeaturesConfig(com.facebook.presto.sql.analyzer.FeaturesConfig) LineItemGenerator(io.airlift.tpch.LineItemGenerator) PAGE_SORTER(com.facebook.presto.hive.HiveTestUtils.PAGE_SORTER) HIVE_STRING(com.facebook.presto.hive.HiveType.HIVE_STRING) List(java.util.List) Stream(java.util.stream.Stream) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) Assertions.assertGreaterThan(com.facebook.airlift.testing.Assertions.assertGreaterThan) Optional(java.util.Optional) ConnectorId(com.facebook.presto.spi.ConnectorId) HiveTestUtils.getDefaultHiveBatchPageSourceFactories(com.facebook.presto.hive.HiveTestUtils.getDefaultHiveBatchPageSourceFactories) LineItem(io.airlift.tpch.LineItem) Assert.assertEquals(com.facebook.presto.testing.assertions.Assert.assertEquals) Column(com.facebook.presto.hive.metastore.Column) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) HivePageSinkMetadata(com.facebook.presto.hive.metastore.HivePageSinkMetadata) OptionalInt(java.util.OptionalInt) DATE(com.facebook.presto.common.type.DateType.DATE) ArrayList(java.util.ArrayList) HIVE_DATE(com.facebook.presto.hive.HiveType.HIVE_DATE) ROW_EXPRESSION_SERVICE(com.facebook.presto.hive.HiveTestUtils.ROW_EXPRESSION_SERVICE) HIVE_INT(com.facebook.presto.hive.HiveType.HIVE_INT) HiveTestUtils.createTestHdfsEnvironment(com.facebook.presto.hive.HiveTestUtils.createTestHdfsEnvironment) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) ImmutableList(com.google.common.collect.ImmutableList) HIVE_DOUBLE(com.facebook.presto.hive.HiveType.HIVE_DOUBLE) NON_CACHEABLE(com.facebook.presto.spi.SplitContext.NON_CACHEABLE) Files(com.google.common.io.Files) LineItemColumn(io.airlift.tpch.LineItemColumn) TableHandle(com.facebook.presto.spi.TableHandle) Type(com.facebook.presto.common.type.Type) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) Storage(com.facebook.presto.hive.metastore.Storage) HiveTestUtils.getDefaultHiveSelectivePageSourceFactories(com.facebook.presto.hive.HiveTestUtils.getDefaultHiveSelectivePageSourceFactories) HIVE_CATALOG(com.facebook.presto.hive.HiveQueryRunner.HIVE_CATALOG) TEST_HIVE_PAGE_SINK_CONTEXT(com.facebook.presto.hive.AbstractTestHiveClient.TEST_HIVE_PAGE_SINK_CONTEXT) NEW(com.facebook.presto.hive.LocationHandle.TableType.NEW) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) File(java.io.File) FUNCTION_AND_TYPE_MANAGER(com.facebook.presto.hive.HiveTestUtils.FUNCTION_AND_TYPE_MANAGER) MaterializedResult(com.facebook.presto.testing.MaterializedResult) NONE(com.facebook.presto.hive.HiveCompressionCodec.NONE) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) Assert.assertTrue(org.testng.Assert.assertTrue) GroupByHashPageIndexerFactory(com.facebook.presto.GroupByHashPageIndexerFactory) SplitWeight(com.facebook.presto.spi.SplitWeight) JoinCompiler(com.facebook.presto.sql.gen.JoinCompiler) HiveTestUtils.getDefaultHiveRecordCursorProvider(com.facebook.presto.hive.HiveTestUtils.getDefaultHiveRecordCursorProvider) LineItemColumn(io.airlift.tpch.LineItemColumn) ArrayList(java.util.ArrayList) LineItem(io.airlift.tpch.LineItem) Page(com.facebook.presto.common.Page) PageBuilder(com.facebook.presto.common.PageBuilder) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) TpchColumnType(io.airlift.tpch.TpchColumnType) Type(com.facebook.presto.common.type.Type) ConnectorPageSink(com.facebook.presto.spi.ConnectorPageSink) MaterializedResult(com.facebook.presto.testing.MaterializedResult) File(java.io.File) LineItemGenerator(io.airlift.tpch.LineItemGenerator) BlockBuilder(com.facebook.presto.common.block.BlockBuilder)

Example 24 with INTEGER

use of com.facebook.presto.common.type.IntegerType.INTEGER in project presto by prestodb.

the class HiveFileFormatBenchmark method createTpchDataSet.

private static <E extends TpchEntity> TestData createTpchDataSet(FileFormat format, TpchTable<E> tpchTable, List<TpchColumn<E>> columns) {
    List<String> columnNames = columns.stream().map(TpchColumn::getColumnName).collect(toList());
    List<Type> columnTypes = columns.stream().map(HiveFileFormatBenchmark::getColumnType).map(type -> format.supportsDate() || !DATE.equals(type) ? type : createUnboundedVarcharType()).collect(toList());
    PageBuilder pageBuilder = new PageBuilder(columnTypes);
    ImmutableList.Builder<Page> pages = ImmutableList.builder();
    long dataSize = 0;
    for (E row : tpchTable.createGenerator(10, 1, 1)) {
        pageBuilder.declarePosition();
        for (int i = 0; i < columns.size(); i++) {
            TpchColumn<E> column = columns.get(i);
            BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(i);
            switch(column.getType().getBase()) {
                case IDENTIFIER:
                    BIGINT.writeLong(blockBuilder, column.getIdentifier(row));
                    break;
                case INTEGER:
                    INTEGER.writeLong(blockBuilder, column.getInteger(row));
                    break;
                case DATE:
                    if (format.supportsDate()) {
                        DATE.writeLong(blockBuilder, column.getDate(row));
                    } else {
                        createUnboundedVarcharType().writeString(blockBuilder, column.getString(row));
                    }
                    break;
                case DOUBLE:
                    DOUBLE.writeDouble(blockBuilder, column.getDouble(row));
                    break;
                case VARCHAR:
                    createUnboundedVarcharType().writeSlice(blockBuilder, Slices.utf8Slice(column.getString(row)));
                    break;
                default:
                    throw new IllegalArgumentException("Unsupported type " + column.getType());
            }
        }
        if (pageBuilder.isFull()) {
            Page page = pageBuilder.build();
            pages.add(page);
            pageBuilder.reset();
            dataSize += page.getSizeInBytes();
            if (dataSize >= MIN_DATA_SIZE) {
                break;
            }
        }
    }
    return new TestData(columnNames, columnTypes, pages.build());
}
Also used : HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) Page(com.facebook.presto.common.Page) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) RunResult(org.openjdk.jmh.results.RunResult) LINE_ITEM(io.airlift.tpch.TpchTable.LINE_ITEM) Random(java.util.Random) CacheConfig(com.facebook.presto.cache.CacheConfig) Warmup(org.openjdk.jmh.annotations.Warmup) PageBuilder(com.facebook.presto.common.PageBuilder) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit) Files.createTempDirectory(java.nio.file.Files.createTempDirectory) Slices(io.airlift.slice.Slices) HiveCompressionCodec(com.facebook.presto.hive.HiveCompressionCodec) TearDown(org.openjdk.jmh.annotations.TearDown) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) Setup(org.openjdk.jmh.annotations.Setup) BlockBuilder(com.facebook.presto.common.block.BlockBuilder) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) Param(org.openjdk.jmh.annotations.Param) Collection(java.util.Collection) UUID(java.util.UUID) String.format(java.lang.String.format) ConnectorSession(com.facebook.presto.spi.ConnectorSession) TpchTable(io.airlift.tpch.TpchTable) UncheckedIOException(java.io.UncheckedIOException) TpchEntity(io.airlift.tpch.TpchEntity) OptionsBuilder(org.openjdk.jmh.runner.options.OptionsBuilder) DataSize(io.airlift.units.DataSize) List(java.util.List) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) HiveSessionProperties(com.facebook.presto.hive.HiveSessionProperties) Options(org.openjdk.jmh.runner.options.Options) TpchColumn(io.airlift.tpch.TpchColumn) Measurement(org.openjdk.jmh.annotations.Measurement) ORDERS(io.airlift.tpch.TpchTable.ORDERS) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) Scope(org.openjdk.jmh.annotations.Scope) ParquetFileWriterConfig(com.facebook.presto.hive.ParquetFileWriterConfig) OrderColumn(io.airlift.tpch.OrderColumn) DATE(com.facebook.presto.common.type.DateType.DATE) ArrayList(java.util.ArrayList) Statistics(org.openjdk.jmh.util.Statistics) HiveTestUtils.createTestHdfsEnvironment(com.facebook.presto.hive.HiveTestUtils.createTestHdfsEnvironment) AuxCounters(org.openjdk.jmh.annotations.AuxCounters) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) ImmutableList(com.google.common.collect.ImmutableList) METASTORE_CLIENT_CONFIG(com.facebook.presto.hive.HiveTestUtils.METASTORE_CLIENT_CONFIG) ArrayType(com.facebook.presto.common.type.ArrayType) HiveTestUtils.mapType(com.facebook.presto.hive.HiveTestUtils.mapType) Runner(org.openjdk.jmh.runner.Runner) Type(com.facebook.presto.common.type.Type) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) OrcFileWriterConfig(com.facebook.presto.hive.OrcFileWriterConfig) HadoopNative(com.facebook.presto.hadoop.HadoopNative) IOException(java.io.IOException) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) State(org.openjdk.jmh.annotations.State) Benchmark(org.openjdk.jmh.annotations.Benchmark) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) Fork(org.openjdk.jmh.annotations.Fork) IntArrays(it.unimi.dsi.fastutil.ints.IntArrays) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) DATE(com.facebook.presto.common.type.DateType.DATE) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) ImmutableList(com.google.common.collect.ImmutableList) Page(com.facebook.presto.common.Page) PageBuilder(com.facebook.presto.common.PageBuilder) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) ArrayType(com.facebook.presto.common.type.ArrayType) HiveTestUtils.mapType(com.facebook.presto.hive.HiveTestUtils.mapType) Type(com.facebook.presto.common.type.Type) BlockBuilder(com.facebook.presto.common.block.BlockBuilder)

Example 25 with INTEGER

use of com.facebook.presto.common.type.IntegerType.INTEGER in project presto by prestodb.

the class HiveUtil method createRecordReader.

public static RecordReader<?, ?> createRecordReader(Configuration configuration, Path path, long start, long length, Properties schema, List<HiveColumnHandle> columns, Map<String, String> customSplitInfo) {
    // determine which hive columns we will read
    List<HiveColumnHandle> readColumns = ImmutableList.copyOf(filter(columns, column -> column.getColumnType() == REGULAR));
    List<Integer> readHiveColumnIndexes = ImmutableList.copyOf(transform(readColumns, HiveColumnHandle::getHiveColumnIndex));
    // Tell hive the columns we would like to read, this lets hive optimize reading column oriented files
    setReadColumns(configuration, readHiveColumnIndexes);
    // Only propagate serialization schema configs by default
    Predicate<String> schemaFilter = schemaProperty -> schemaProperty.startsWith("serialization.");
    InputFormat<?, ?> inputFormat = getInputFormat(configuration, getInputFormatName(schema), true);
    JobConf jobConf = toJobConf(configuration);
    FileSplit fileSplit = new FileSplit(path, start, length, (String[]) null);
    if (!customSplitInfo.isEmpty() && isHudiRealtimeSplit(customSplitInfo)) {
        fileSplit = recreateSplitWithCustomInfo(fileSplit, customSplitInfo);
        // Add additional column information for record reader
        List<String> readHiveColumnNames = ImmutableList.copyOf(transform(readColumns, HiveColumnHandle::getName));
        jobConf.set(READ_COLUMN_NAMES_CONF_STR, Joiner.on(',').join(readHiveColumnNames));
        // Remove filter when using customSplitInfo as the record reader requires complete schema configs
        schemaFilter = schemaProperty -> true;
    }
    schema.stringPropertyNames().stream().filter(schemaFilter).forEach(name -> jobConf.set(name, schema.getProperty(name)));
    // add Airlift LZO and LZOP to head of codecs list so as to not override existing entries
    List<String> codecs = newArrayList(Splitter.on(",").trimResults().omitEmptyStrings().split(jobConf.get("io.compression.codecs", "")));
    if (!codecs.contains(LzoCodec.class.getName())) {
        codecs.add(0, LzoCodec.class.getName());
    }
    if (!codecs.contains(LzopCodec.class.getName())) {
        codecs.add(0, LzopCodec.class.getName());
    }
    jobConf.set("io.compression.codecs", codecs.stream().collect(joining(",")));
    try {
        RecordReader<WritableComparable, Writable> recordReader = (RecordReader<WritableComparable, Writable>) inputFormat.getRecordReader(fileSplit, jobConf, Reporter.NULL);
        int headerCount = getHeaderCount(schema);
        // Only skip header rows when the split is at the beginning of the file
        if (start == 0 && headerCount > 0) {
            Utilities.skipHeader(recordReader, headerCount, recordReader.createKey(), recordReader.createValue());
        }
        int footerCount = getFooterCount(schema);
        if (footerCount > 0) {
            recordReader = new FooterAwareRecordReader<>(recordReader, footerCount, jobConf);
        }
        return recordReader;
    } catch (IOException e) {
        if (e instanceof TextLineLengthLimitExceededException) {
            throw new PrestoException(HIVE_BAD_DATA, "Line too long in text file: " + path, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, format("Error opening Hive split %s (offset=%s, length=%s) using %s: %s", path, start, length, getInputFormatName(schema), firstNonNull(e.getMessage(), e.getClass().getName())), e);
    }
}
Also used : HIVE_TABLE_BUCKETING_IS_IGNORED(com.facebook.presto.hive.HiveErrorCode.HIVE_TABLE_BUCKETING_IS_IGNORED) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) DECIMAL_TYPE_NAME(org.apache.hadoop.hive.serde.serdeConstants.DECIMAL_TYPE_NAME) GENERIC_INTERNAL_ERROR(com.facebook.presto.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) Writable(org.apache.hadoop.io.Writable) Short.parseShort(java.lang.Short.parseShort) TypeSignature(com.facebook.presto.common.type.TypeSignature) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) MAX_PARTITION_KEY_COLUMN_INDEX(com.facebook.presto.hive.HiveColumnHandle.MAX_PARTITION_KEY_COLUMN_INDEX) BigDecimal(java.math.BigDecimal) FileSplit(org.apache.hadoop.mapred.FileSplit) Matcher(java.util.regex.Matcher) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Varchars.isVarcharType(com.facebook.presto.common.type.Varchars.isVarcharType) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) BigInteger(java.math.BigInteger) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) HiveColumnHandle.isFileModifiedTimeColumnHandle(com.facebook.presto.hive.HiveColumnHandle.isFileModifiedTimeColumnHandle) Double.parseDouble(java.lang.Double.parseDouble) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) PageInputFormat(com.facebook.presto.hive.pagefile.PageInputFormat) LzoCodec(io.airlift.compress.lzo.LzoCodec) NullableValue(com.facebook.presto.common.predicate.NullableValue) HIVE_INVALID_METADATA(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_METADATA) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HIVE_SERDE_NOT_FOUND(com.facebook.presto.hive.HiveErrorCode.HIVE_SERDE_NOT_FOUND) READ_ALL_COLUMNS(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_ALL_COLUMNS) HiveColumnHandle.bucketColumnHandle(com.facebook.presto.hive.HiveColumnHandle.bucketColumnHandle) Collectors.joining(java.util.stream.Collectors.joining) InvocationTargetException(java.lang.reflect.InvocationTargetException) TypeUtils.isEnumType(com.facebook.presto.common.type.TypeUtils.isEnumType) UncheckedIOException(java.io.UncheckedIOException) Decimals.isShortDecimal(com.facebook.presto.common.type.Decimals.isShortDecimal) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) FooterAwareRecordReader(com.facebook.presto.hive.util.FooterAwareRecordReader) Predicate(com.google.common.base.Predicate) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) DateTimePrinter(org.joda.time.format.DateTimePrinter) RecordReader(org.apache.hadoop.mapred.RecordReader) Iterables.filter(com.google.common.collect.Iterables.filter) Joiner(com.google.common.base.Joiner) StandardTypes(com.facebook.presto.common.type.StandardTypes) DecimalType(com.facebook.presto.common.type.DecimalType) Table(com.facebook.presto.hive.metastore.Table) Slice(io.airlift.slice.Slice) DateTimeFormatterBuilder(org.joda.time.format.DateTimeFormatterBuilder) ByteArrayOutputStream(java.io.ByteArrayOutputStream) TypeSignatureParameter(com.facebook.presto.common.type.TypeSignatureParameter) Chars.isCharType(com.facebook.presto.common.type.Chars.isCharType) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) TINYINT(com.facebook.presto.common.type.TinyintType.TINYINT) WritableComparable(org.apache.hadoop.io.WritableComparable) TIMESTAMP(com.facebook.presto.common.type.TimestampType.TIMESTAMP) DATE(com.facebook.presto.common.type.DateType.DATE) REAL(com.facebook.presto.common.type.RealType.REAL) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) Byte.parseByte(java.lang.Byte.parseByte) ConfigurationUtils.toJobConf(com.facebook.presto.hive.util.ConfigurationUtils.toJobConf) COLLECTION_DELIM(org.apache.hadoop.hive.serde.serdeConstants.COLLECTION_DELIM) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) HIVE_BAD_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_BAD_DATA) CharType(com.facebook.presto.common.type.CharType) Nullable(javax.annotation.Nullable) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) Properties(java.util.Properties) HiveColumnHandle.fileSizeColumnHandle(com.facebook.presto.hive.HiveColumnHandle.fileSizeColumnHandle) Reporter(org.apache.hadoop.mapred.Reporter) HiveColumnHandle.pathColumnHandle(com.facebook.presto.hive.HiveColumnHandle.pathColumnHandle) IOException(java.io.IOException) HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) Field(java.lang.reflect.Field) HiveColumnHandle.isBucketColumnHandle(com.facebook.presto.hive.HiveColumnHandle.isBucketColumnHandle) Chars.trimTrailingSpaces(com.facebook.presto.common.type.Chars.trimTrailingSpaces) SMALLINT(com.facebook.presto.common.type.SmallintType.SMALLINT) HiveColumnHandle.fileModifiedTimeColumnHandle(com.facebook.presto.hive.HiveColumnHandle.fileModifiedTimeColumnHandle) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) CustomSplitConversionUtils.recreateSplitWithCustomInfo(com.facebook.presto.hive.util.CustomSplitConversionUtils.recreateSplitWithCustomInfo) TextLineLengthLimitExceededException(com.facebook.presto.hadoop.TextLineLengthLimitExceededException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) Long.parseLong(java.lang.Long.parseLong) ReflectionUtils(org.apache.hadoop.util.ReflectionUtils) ZstdInputStreamNoFinalizer(com.github.luben.zstd.ZstdInputStreamNoFinalizer) DateTimeParser(org.joda.time.format.DateTimeParser) HIVE_INVALID_VIEW_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_VIEW_DATA) HiveColumnHandle.isFileSizeColumnHandle(com.facebook.presto.hive.HiveColumnHandle.isFileSizeColumnHandle) HoodieParquetRealtimeInputFormat(org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat) READ_COLUMN_NAMES_CONF_STR(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR) Float.parseFloat(java.lang.Float.parseFloat) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ByteArrayInputStream(java.io.ByteArrayInputStream) ParquetHiveSerDe(org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe) InputFormat(org.apache.hadoop.mapred.InputFormat) Path(org.apache.hadoop.fs.Path) ConfigurationUtils.copy(com.facebook.presto.hive.util.ConfigurationUtils.copy) Splitter(com.google.common.base.Splitter) Method(java.lang.reflect.Method) SliceUtf8(io.airlift.slice.SliceUtf8) DateTimeFormat(org.joda.time.format.DateTimeFormat) ISODateTimeFormat(org.joda.time.format.ISODateTimeFormat) HIVE_UNSUPPORTED_FORMAT(com.facebook.presto.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT) ZstdOutputStreamNoFinalizer(com.github.luben.zstd.ZstdOutputStreamNoFinalizer) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) VarcharType(com.facebook.presto.common.type.VarcharType) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Codec(com.facebook.airlift.json.Codec) Decimals.isLongDecimal(com.facebook.presto.common.type.Decimals.isLongDecimal) String.format(java.lang.String.format) PrestoAvroSerDe(com.facebook.presto.hive.avro.PrestoAvroSerDe) RecordCursor(com.facebook.presto.spi.RecordCursor) Base64(java.util.Base64) List(java.util.List) ColumnMetadata(com.facebook.presto.spi.ColumnMetadata) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) Annotation(java.lang.annotation.Annotation) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) READ_COLUMN_IDS_CONF_STR(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR) Pattern(java.util.regex.Pattern) PARTITION_KEY(com.facebook.presto.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) Column(com.facebook.presto.hive.metastore.Column) ROUND_UNNECESSARY(java.math.BigDecimal.ROUND_UNNECESSARY) HIVE_INVALID_PARTITION_VALUE(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE) Lists.transform(com.google.common.collect.Lists.transform) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) HiveColumnHandle.isPathColumnHandle(com.facebook.presto.hive.HiveColumnHandle.isPathColumnHandle) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) LzopCodec(io.airlift.compress.lzo.LzopCodec) SymlinkTextInputFormat(org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat) Utilities(org.apache.hadoop.hive.ql.exec.Utilities) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) MapredParquetInputFormat(org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat) ImmutableList(com.google.common.collect.ImmutableList) HoodieTableMetaClient(org.apache.hudi.common.table.HoodieTableMetaClient) TypeManager(com.facebook.presto.common.type.TypeManager) HIVE_FILE_MISSING_COLUMN_NAMES(com.facebook.presto.hive.HiveErrorCode.HIVE_FILE_MISSING_COLUMN_NAMES) Objects.requireNonNull(java.util.Objects.requireNonNull) OrcType(com.facebook.presto.orc.metadata.OrcType) HoodieParquetInputFormat(org.apache.hudi.hadoop.HoodieParquetInputFormat) Type(com.facebook.presto.common.type.Type) NamedTypeSignature(com.facebook.presto.common.type.NamedTypeSignature) VerifyException(com.google.common.base.VerifyException) Storage(com.facebook.presto.hive.metastore.Storage) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) UTF_8(java.nio.charset.StandardCharsets.UTF_8) HIVE_DEFAULT_DYNAMIC_PARTITION(com.facebook.presto.hive.metastore.MetastoreUtil.HIVE_DEFAULT_DYNAMIC_PARTITION) Decimals(com.facebook.presto.common.type.Decimals) Integer.parseInt(java.lang.Integer.parseInt) JavaUtils(org.apache.hadoop.hive.common.JavaUtils) JobConf(org.apache.hadoop.mapred.JobConf) TimeUnit(java.util.concurrent.TimeUnit) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) RowFieldName(com.facebook.presto.common.type.RowFieldName) MetastoreUtil.checkCondition(com.facebook.presto.hive.metastore.MetastoreUtil.checkCondition) InputStream(java.io.InputStream) DecimalType.createDecimalType(com.facebook.presto.common.type.DecimalType.createDecimalType) FooterAwareRecordReader(com.facebook.presto.hive.util.FooterAwareRecordReader) RecordReader(org.apache.hadoop.mapred.RecordReader) Writable(org.apache.hadoop.io.Writable) LzoCodec(io.airlift.compress.lzo.LzoCodec) PrestoException(com.facebook.presto.spi.PrestoException) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) FileSplit(org.apache.hadoop.mapred.FileSplit) HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) BigInteger(java.math.BigInteger) LzopCodec(io.airlift.compress.lzo.LzopCodec) TextLineLengthLimitExceededException(com.facebook.presto.hadoop.TextLineLengthLimitExceededException) WritableComparable(org.apache.hadoop.io.WritableComparable) ConfigurationUtils.toJobConf(com.facebook.presto.hive.util.ConfigurationUtils.toJobConf) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

INTEGER (com.facebook.presto.common.type.IntegerType.INTEGER)28 ImmutableList (com.google.common.collect.ImmutableList)26 List (java.util.List)25 Type (com.facebook.presto.common.type.Type)23 Optional (java.util.Optional)22 BIGINT (com.facebook.presto.common.type.BigintType.BIGINT)19 Map (java.util.Map)19 Test (org.testng.annotations.Test)19 BOOLEAN (com.facebook.presto.common.type.BooleanType.BOOLEAN)18 ImmutableMap (com.google.common.collect.ImmutableMap)18 Page (com.facebook.presto.common.Page)17 DOUBLE (com.facebook.presto.common.type.DoubleType.DOUBLE)17 ArrayList (java.util.ArrayList)17 Assert.assertEquals (org.testng.Assert.assertEquals)17 REAL (com.facebook.presto.common.type.RealType.REAL)15 SMALLINT (com.facebook.presto.common.type.SmallintType.SMALLINT)15 Block (com.facebook.presto.common.block.Block)14 DATE (com.facebook.presto.common.type.DateType.DATE)14 VARCHAR (com.facebook.presto.common.type.VarcharType.VARCHAR)14 Collectors.toList (java.util.stream.Collectors.toList)14