Search in sources :

Example 1 with INTEGER

use of com.facebook.presto.spi.type.IntegerType.INTEGER in project presto by prestodb.

the class TestHivePageSink method writeTestFile.

private static long writeTestFile(HiveClientConfig config, ExtendedHiveMetastore metastore, String outputPath) {
    HiveTransactionHandle transaction = new HiveTransactionHandle();
    ConnectorPageSink pageSink = createPageSink(transaction, config, metastore, new Path("file:///" + outputPath));
    List<LineItemColumn> columns = getTestColumns();
    List<Type> columnTypes = columns.stream().map(LineItemColumn::getType).map(TestHivePageSink::getHiveType).map(hiveType -> hiveType.getType(TYPE_MANAGER)).collect(toList());
    PageBuilder pageBuilder = new PageBuilder(columnTypes);
    int rows = 0;
    for (LineItem lineItem : new LineItemGenerator(0.01, 1, 1)) {
        rows++;
        if (rows >= NUM_ROWS) {
            break;
        }
        pageBuilder.declarePosition();
        for (int i = 0; i < columns.size(); i++) {
            LineItemColumn column = columns.get(i);
            BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(i);
            switch(column.getType().getBase()) {
                case IDENTIFIER:
                    BIGINT.writeLong(blockBuilder, column.getIdentifier(lineItem));
                    break;
                case INTEGER:
                    INTEGER.writeLong(blockBuilder, column.getInteger(lineItem));
                    break;
                case DATE:
                    DATE.writeLong(blockBuilder, column.getDate(lineItem));
                    break;
                case DOUBLE:
                    DOUBLE.writeDouble(blockBuilder, column.getDouble(lineItem));
                    break;
                case VARCHAR:
                    createUnboundedVarcharType().writeSlice(blockBuilder, Slices.utf8Slice(column.getString(lineItem)));
                    break;
                default:
                    throw new IllegalArgumentException("Unsupported type " + column.getType());
            }
        }
    }
    Page page = pageBuilder.build();
    pageSink.appendPage(page);
    getFutureValue(pageSink.finish());
    File outputDir = new File(outputPath);
    List<File> files = ImmutableList.copyOf(outputDir.listFiles((dir, name) -> !name.endsWith(".crc")));
    File outputFile = getOnlyElement(files);
    long length = outputFile.length();
    ConnectorPageSource pageSource = createPageSource(transaction, config, outputFile);
    List<Page> pages = new ArrayList<>();
    while (!pageSource.isFinished()) {
        Page nextPage = pageSource.getNextPage();
        if (nextPage != null) {
            nextPage.assureLoaded();
            pages.add(nextPage);
        }
    }
    MaterializedResult expectedResults = toMaterializedResult(getSession(config), columnTypes, ImmutableList.of(page));
    MaterializedResult results = toMaterializedResult(getSession(config), columnTypes, pages);
    assertEquals(results, expectedResults);
    return length;
}
Also used : Path(org.apache.hadoop.fs.Path) Page(com.facebook.presto.spi.Page) HiveTestUtils.getDefaultHiveDataStreamFactories(com.facebook.presto.hive.HiveTestUtils.getDefaultHiveDataStreamFactories) HiveTestUtils.getDefaultHiveFileWriterFactories(com.facebook.presto.hive.HiveTestUtils.getDefaultHiveFileWriterFactories) Assertions.assertGreaterThan(io.airlift.testing.Assertions.assertGreaterThan) Test(org.testng.annotations.Test) TpchColumnTypes(io.airlift.tpch.TpchColumnTypes) BIGINT(com.facebook.presto.spi.type.BigintType.BIGINT) SchemaTableName(com.facebook.presto.spi.SchemaTableName) ExtendedHiveMetastore(com.facebook.presto.hive.metastore.ExtendedHiveMetastore) Slices(io.airlift.slice.Slices) ConnectorPageSink(com.facebook.presto.spi.ConnectorPageSink) Path(org.apache.hadoop.fs.Path) HIVE_LONG(com.facebook.presto.hive.HiveType.HIVE_LONG) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) TpchColumnType(io.airlift.tpch.TpchColumnType) FileUtils(io.airlift.testing.FileUtils) ImmutableMap(com.google.common.collect.ImmutableMap) BlockBuilder(com.facebook.presto.spi.block.BlockBuilder) String.format(java.lang.String.format) ConnectorSession(com.facebook.presto.spi.ConnectorSession) LineItemGenerator(io.airlift.tpch.LineItemGenerator) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) HIVE_STRING(com.facebook.presto.hive.HiveType.HIVE_STRING) List(java.util.List) Stream(java.util.stream.Stream) TYPE_MANAGER(com.facebook.presto.hive.HiveTestUtils.TYPE_MANAGER) Optional(java.util.Optional) INTEGER(com.facebook.presto.spi.type.IntegerType.INTEGER) Joiner(com.google.common.base.Joiner) JsonCodec(io.airlift.json.JsonCodec) DOUBLE(com.facebook.presto.spi.type.DoubleType.DOUBLE) LineItem(io.airlift.tpch.LineItem) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) HivePageSinkMetadata(com.facebook.presto.hive.metastore.HivePageSinkMetadata) Assert.assertEquals(org.testng.Assert.assertEquals) OptionalInt(java.util.OptionalInt) ArrayList(java.util.ArrayList) HIVE_DATE(com.facebook.presto.hive.HiveType.HIVE_DATE) HIVE_INT(com.facebook.presto.hive.HiveType.HIVE_INT) HiveTestUtils.createTestHdfsEnvironment(com.facebook.presto.hive.HiveTestUtils.createTestHdfsEnvironment) TestingHiveMetastore(com.facebook.presto.hive.metastore.TestingHiveMetastore) ImmutableList(com.google.common.collect.ImmutableList) HIVE_DOUBLE(com.facebook.presto.hive.HiveType.HIVE_DOUBLE) Files(com.google.common.io.Files) Type(com.facebook.presto.spi.type.Type) LineItemColumn(io.airlift.tpch.LineItemColumn) Properties(java.util.Properties) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) File(java.io.File) VarcharType.createUnboundedVarcharType(com.facebook.presto.spi.type.VarcharType.createUnboundedVarcharType) MaterializedResult(com.facebook.presto.testing.MaterializedResult) NONE(com.facebook.presto.hive.HiveCompressionCodec.NONE) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) PageBuilder(com.facebook.presto.spi.PageBuilder) DATE(com.facebook.presto.spi.type.DateType.DATE) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) Assert.assertTrue(org.testng.Assert.assertTrue) GroupByHashPageIndexerFactory(com.facebook.presto.GroupByHashPageIndexerFactory) JoinCompiler(com.facebook.presto.sql.gen.JoinCompiler) HiveTestUtils.getDefaultHiveRecordCursorProvider(com.facebook.presto.hive.HiveTestUtils.getDefaultHiveRecordCursorProvider) LineItemColumn(io.airlift.tpch.LineItemColumn) ArrayList(java.util.ArrayList) LineItem(io.airlift.tpch.LineItem) Page(com.facebook.presto.spi.Page) PageBuilder(com.facebook.presto.spi.PageBuilder) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) TpchColumnType(io.airlift.tpch.TpchColumnType) Type(com.facebook.presto.spi.type.Type) VarcharType.createUnboundedVarcharType(com.facebook.presto.spi.type.VarcharType.createUnboundedVarcharType) ConnectorPageSink(com.facebook.presto.spi.ConnectorPageSink) MaterializedResult(com.facebook.presto.testing.MaterializedResult) File(java.io.File) LineItemGenerator(io.airlift.tpch.LineItemGenerator) BlockBuilder(com.facebook.presto.spi.block.BlockBuilder)

Example 2 with INTEGER

use of com.facebook.presto.spi.type.IntegerType.INTEGER in project presto by prestodb.

the class TestRaptorIntegrationSmokeTest method testBucketNumberHiddenColumn.

@Test
public void testBucketNumberHiddenColumn() throws Exception {
    assertUpdate("" + "CREATE TABLE test_bucket_number " + "WITH (bucket_count = 50, bucketed_on = ARRAY ['orderkey']) " + "AS SELECT * FROM orders", "SELECT count(*) FROM orders");
    MaterializedResult actualResults = computeActual("SELECT DISTINCT \"$bucket_number\" FROM test_bucket_number");
    assertEquals(actualResults.getTypes(), ImmutableList.of(INTEGER));
    Set<Object> actual = actualResults.getMaterializedRows().stream().map(row -> row.getField(0)).collect(toSet());
    assertEquals(actual, IntStream.range(0, 50).boxed().collect(toSet()));
}
Also used : IntStream(java.util.stream.IntStream) RaptorQueryRunner.createRaptorQueryRunner(com.facebook.presto.raptor.RaptorQueryRunner.createRaptorQueryRunner) SHARD_UUID_COLUMN_TYPE(com.facebook.presto.raptor.RaptorColumnHandle.SHARD_UUID_COLUMN_TYPE) Assertions.assertInstanceOf(io.airlift.testing.Assertions.assertInstanceOf) ArrayType(com.facebook.presto.type.ArrayType) Assertions.assertGreaterThan(io.airlift.testing.Assertions.assertGreaterThan) Assert.assertEquals(org.testng.Assert.assertEquals) Test(org.testng.annotations.Test) BIGINT(com.facebook.presto.spi.type.BigintType.BIGINT) HashMultimap(com.google.common.collect.HashMultimap) ImmutableList(com.google.common.collect.ImmutableList) BOOLEAN(com.facebook.presto.spi.type.BooleanType.BOOLEAN) Arrays.asList(java.util.Arrays.asList) Map(java.util.Map) Assertions.assertLessThan(io.airlift.testing.Assertions.assertLessThan) Assertions.assertGreaterThanOrEqual(io.airlift.testing.Assertions.assertGreaterThanOrEqual) Collectors.toSet(java.util.stream.Collectors.toSet) Assert.assertNotEquals(org.testng.Assert.assertNotEquals) ImmutableMap(com.google.common.collect.ImmutableMap) Language(org.intellij.lang.annotations.Language) Timestamp(java.sql.Timestamp) Collection(java.util.Collection) Set(java.util.Set) VARCHAR(com.facebook.presto.spi.type.VarcharType.VARCHAR) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) UUID(java.util.UUID) Assert.assertNotNull(org.testng.Assert.assertNotNull) SetMultimap(com.google.common.collect.SetMultimap) String.format(java.lang.String.format) Date(java.sql.Date) MaterializedResult(com.facebook.presto.testing.MaterializedResult) DATE(com.facebook.presto.spi.type.DateType.DATE) MaterializedRow(com.facebook.presto.testing.MaterializedRow) StringJoiner(java.util.StringJoiner) AbstractTestIntegrationSmokeTest(com.facebook.presto.tests.AbstractTestIntegrationSmokeTest) ImmutableCollectors(com.facebook.presto.util.ImmutableCollectors) INTEGER(com.facebook.presto.spi.type.IntegerType.INTEGER) MaterializedResult(com.facebook.presto.testing.MaterializedResult) Test(org.testng.annotations.Test) AbstractTestIntegrationSmokeTest(com.facebook.presto.tests.AbstractTestIntegrationSmokeTest)

Example 3 with INTEGER

use of com.facebook.presto.spi.type.IntegerType.INTEGER in project presto by prestodb.

the class HiveUtil method createRecordReader.

public static RecordReader<?, ?> createRecordReader(Configuration configuration, Path path, long start, long length, Properties schema, List<HiveColumnHandle> columns) {
    // determine which hive columns we will read
    List<HiveColumnHandle> readColumns = ImmutableList.copyOf(filter(columns, column -> column.getColumnType() == REGULAR));
    List<Integer> readHiveColumnIndexes = ImmutableList.copyOf(transform(readColumns, HiveColumnHandle::getHiveColumnIndex));
    // Tell hive the columns we would like to read, this lets hive optimize reading column oriented files
    setReadColumns(configuration, readHiveColumnIndexes);
    InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, true);
    JobConf jobConf = new JobConf(configuration);
    FileSplit fileSplit = new FileSplit(path, start, length, (String[]) null);
    // propagate serialization configuration to getRecordReader
    schema.stringPropertyNames().stream().filter(name -> name.startsWith("serialization.")).forEach(name -> jobConf.set(name, schema.getProperty(name)));
    try {
        return retry().stopOnIllegalExceptions().run("createRecordReader", () -> inputFormat.getRecordReader(fileSplit, jobConf, Reporter.NULL));
    } catch (Exception e) {
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, format("Error opening Hive split %s (offset=%s, length=%s) using %s: %s", path, start, length, getInputFormatName(schema), e.getMessage()), e);
    }
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_DEFAULT_DYNAMIC_PARTITION(com.facebook.presto.hive.HivePartitionKey.HIVE_DEFAULT_DYNAMIC_PARTITION) DECIMAL_TYPE_NAME(org.apache.hadoop.hive.serde.serdeConstants.DECIMAL_TYPE_NAME) Short.parseShort(java.lang.Short.parseShort) BIGINT(com.facebook.presto.spi.type.BigintType.BIGINT) BigDecimal(java.math.BigDecimal) FileSplit(org.apache.hadoop.mapred.FileSplit) Matcher(java.util.regex.Matcher) BOOLEAN(com.facebook.presto.spi.type.BooleanType.BOOLEAN) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) BigInteger(java.math.BigInteger) StandardTypes(com.facebook.presto.spi.type.StandardTypes) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Double.parseDouble(java.lang.Double.parseDouble) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) Decimals(com.facebook.presto.spi.type.Decimals) TINYINT(com.facebook.presto.spi.type.TinyintType.TINYINT) HIVE_INVALID_METADATA(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_METADATA) HIVE_SERDE_NOT_FOUND(com.facebook.presto.hive.HiveErrorCode.HIVE_SERDE_NOT_FOUND) READ_ALL_COLUMNS(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_ALL_COLUMNS) HiveColumnHandle.bucketColumnHandle(com.facebook.presto.hive.HiveColumnHandle.bucketColumnHandle) InvocationTargetException(java.lang.reflect.InvocationTargetException) DecimalType.createDecimalType(com.facebook.presto.spi.type.DecimalType.createDecimalType) DateTimePrinter(org.joda.time.format.DateTimePrinter) RecordReader(org.apache.hadoop.mapred.RecordReader) Iterables.filter(com.google.common.collect.Iterables.filter) Joiner(com.google.common.base.Joiner) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) DOUBLE(com.facebook.presto.spi.type.DoubleType.DOUBLE) Table(com.facebook.presto.hive.metastore.Table) Slice(io.airlift.slice.Slice) DateTimeFormatterBuilder(org.joda.time.format.DateTimeFormatterBuilder) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) Byte.parseByte(java.lang.Byte.parseByte) Type(com.facebook.presto.spi.type.Type) TIMESTAMP(com.facebook.presto.spi.type.TimestampType.TIMESTAMP) Nullable(javax.annotation.Nullable) Properties(java.util.Properties) Reporter(org.apache.hadoop.mapred.Reporter) HiveColumnHandle.pathColumnHandle(com.facebook.presto.hive.HiveColumnHandle.pathColumnHandle) Throwables(com.google.common.base.Throwables) HiveColumnHandle.isBucketColumnHandle(com.facebook.presto.hive.HiveColumnHandle.isBucketColumnHandle) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) VarcharType(com.facebook.presto.spi.type.VarcharType) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) Long.parseLong(java.lang.Long.parseLong) ReflectionUtils(org.apache.hadoop.util.ReflectionUtils) DateTimeParser(org.joda.time.format.DateTimeParser) HIVE_INVALID_VIEW_DATA(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_VIEW_DATA) Float.parseFloat(java.lang.Float.parseFloat) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) SchemaTableName(com.facebook.presto.spi.SchemaTableName) DecimalType(com.facebook.presto.spi.type.DecimalType) ParquetHiveSerDe(org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe) InputFormat(org.apache.hadoop.mapred.InputFormat) Path(org.apache.hadoop.fs.Path) NullableValue(com.facebook.presto.spi.predicate.NullableValue) Method(java.lang.reflect.Method) SliceUtf8(io.airlift.slice.SliceUtf8) DateTimeFormat(org.joda.time.format.DateTimeFormat) ISODateTimeFormat(org.joda.time.format.ISODateTimeFormat) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) RetryDriver.retry(com.facebook.presto.hive.RetryDriver.retry) String.format(java.lang.String.format) RecordCursor(com.facebook.presto.spi.RecordCursor) Base64(java.util.Base64) List(java.util.List) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) Chars.trimSpaces(com.facebook.presto.spi.type.Chars.trimSpaces) Optional(java.util.Optional) ErrorCodeSupplier(com.facebook.presto.spi.ErrorCodeSupplier) READ_COLUMN_IDS_CONF_STR(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR) Pattern(java.util.regex.Pattern) INTEGER(com.facebook.presto.spi.type.IntegerType.INTEGER) PARTITION_KEY(com.facebook.presto.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) FileUtils.unescapePathName(org.apache.hadoop.hive.common.FileUtils.unescapePathName) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) Column(com.facebook.presto.hive.metastore.Column) ROUND_UNNECESSARY(java.math.BigDecimal.ROUND_UNNECESSARY) HIVE_INVALID_PARTITION_VALUE(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) Lists.transform(com.google.common.collect.Lists.transform) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) HiveColumnHandle.isPathColumnHandle(com.facebook.presto.hive.HiveColumnHandle.isPathColumnHandle) HIVE_CANNOT_OPEN_SPLIT(com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) SymlinkTextInputFormat(org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) MapredParquetInputFormat(org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat) ImmutableList(com.google.common.collect.ImmutableList) Objects.requireNonNull(java.util.Objects.requireNonNull) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) UTF_8(java.nio.charset.StandardCharsets.UTF_8) SMALLINT(com.facebook.presto.spi.type.SmallintType.SMALLINT) Integer.parseInt(java.lang.Integer.parseInt) JavaUtils(org.apache.hadoop.hive.common.JavaUtils) CharType(com.facebook.presto.spi.type.CharType) JobConf(org.apache.hadoop.mapred.JobConf) TimeUnit(java.util.concurrent.TimeUnit) DATE(com.facebook.presto.spi.type.DateType.DATE) REAL(com.facebook.presto.spi.type.RealType.REAL) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) MetastoreUtil.getHiveSchema(com.facebook.presto.hive.metastore.MetastoreUtil.getHiveSchema) Chars.isCharType(com.facebook.presto.spi.type.Chars.isCharType) BigInteger(java.math.BigInteger) PrestoException(com.facebook.presto.spi.PrestoException) FileSplit(org.apache.hadoop.mapred.FileSplit) JobConf(org.apache.hadoop.mapred.JobConf) InvocationTargetException(java.lang.reflect.InvocationTargetException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) PrestoException(com.facebook.presto.spi.PrestoException)

Aggregations

BIGINT (com.facebook.presto.spi.type.BigintType.BIGINT)3 DATE (com.facebook.presto.spi.type.DateType.DATE)3 INTEGER (com.facebook.presto.spi.type.IntegerType.INTEGER)3 ImmutableList (com.google.common.collect.ImmutableList)3 REGULAR (com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR)2 SchemaTableName (com.facebook.presto.spi.SchemaTableName)2 BOOLEAN (com.facebook.presto.spi.type.BooleanType.BOOLEAN)2 DOUBLE (com.facebook.presto.spi.type.DoubleType.DOUBLE)2 String.format (java.lang.String.format)2 GroupByHashPageIndexerFactory (com.facebook.presto.GroupByHashPageIndexerFactory)1 PARTITION_KEY (com.facebook.presto.hive.HiveColumnHandle.ColumnType.PARTITION_KEY)1 HiveColumnHandle.bucketColumnHandle (com.facebook.presto.hive.HiveColumnHandle.bucketColumnHandle)1 HiveColumnHandle.isBucketColumnHandle (com.facebook.presto.hive.HiveColumnHandle.isBucketColumnHandle)1 HiveColumnHandle.isPathColumnHandle (com.facebook.presto.hive.HiveColumnHandle.isPathColumnHandle)1 HiveColumnHandle.pathColumnHandle (com.facebook.presto.hive.HiveColumnHandle.pathColumnHandle)1 NONE (com.facebook.presto.hive.HiveCompressionCodec.NONE)1 HIVE_CANNOT_OPEN_SPLIT (com.facebook.presto.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT)1 HIVE_INVALID_METADATA (com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_METADATA)1 HIVE_INVALID_PARTITION_VALUE (com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE)1 HIVE_INVALID_VIEW_DATA (com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_VIEW_DATA)1