Search in sources :

Example 11 with INTEGER

use of io.prestosql.spi.type.IntegerType.INTEGER in project hetu-core by openlookeng.

the class ElasticsearchMetadata method toPrestoType.

private Type toPrestoType(IndexMetadata.Field metaDataField, boolean isArray) {
    IndexMetadata.Type type = metaDataField.getType();
    if (isArray) {
        Type elementType = toPrestoType(metaDataField, false);
        return new ArrayType(elementType);
    }
    if (type instanceof PrimitiveType) {
        switch(((PrimitiveType) type).getName()) {
            case "float":
                return REAL;
            case "double":
                return DOUBLE;
            case "byte":
                return TINYINT;
            case "short":
                return SMALLINT;
            case "integer":
                return INTEGER;
            case "long":
                return BIGINT;
            case "string":
            case "text":
            case "keyword":
                return VARCHAR;
            case "ip":
                return ipAddressType;
            case "boolean":
                return BOOLEAN;
            case "binary":
                return VARBINARY;
            default:
                break;
        }
    } else if (type instanceof DateTimeType) {
        if (((DateTimeType) type).getFormats().isEmpty()) {
            return TIMESTAMP;
        }
    // otherwise, skip -- we don't support custom formats, yet
    } else if (type instanceof ObjectType) {
        ObjectType objectType = (ObjectType) type;
        List<RowType.Field> fields = objectType.getFields().stream().map(field -> RowType.field(field.getName(), toPrestoType(field))).collect(toImmutableList());
        return RowType.from(fields);
    }
    return null;
}
Also used : ArrayType(io.prestosql.spi.type.ArrayType) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) StandardTypes(io.prestosql.spi.type.StandardTypes) INTEGER(io.prestosql.spi.type.IntegerType.INTEGER) DateTimeType(io.prestosql.elasticsearch.client.IndexMetadata.DateTimeType) PrimitiveType(io.prestosql.elasticsearch.client.IndexMetadata.PrimitiveType) Inject(javax.inject.Inject) ObjectType(io.prestosql.elasticsearch.client.IndexMetadata.ObjectType) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) VARCHAR(io.prestosql.spi.type.VarcharType.VARCHAR) ImmutableList(com.google.common.collect.ImmutableList) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) IndexMetadata(io.prestosql.elasticsearch.client.IndexMetadata) ConnectorTableProperties(io.prestosql.spi.connector.ConnectorTableProperties) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) RowType(io.prestosql.spi.type.RowType) BOOLEAN(io.prestosql.spi.type.BooleanType.BOOLEAN) DOUBLE(io.prestosql.spi.type.DoubleType.DOUBLE) Type(io.prestosql.spi.type.Type) REAL(io.prestosql.spi.type.RealType.REAL) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) ENGLISH(java.util.Locale.ENGLISH) Constraint(io.prestosql.spi.connector.Constraint) ConstraintApplicationResult(io.prestosql.spi.connector.ConstraintApplicationResult) ImmutableMap(com.google.common.collect.ImmutableMap) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TupleDomain(io.prestosql.spi.predicate.TupleDomain) ArrayType(io.prestosql.spi.type.ArrayType) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) TypeManager(io.prestosql.spi.type.TypeManager) TIMESTAMP(io.prestosql.spi.type.TimestampType.TIMESTAMP) TINYINT(io.prestosql.spi.type.TinyintType.TINYINT) Collectors(java.util.stream.Collectors) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata) VARBINARY(io.prestosql.spi.type.VarbinaryType.VARBINARY) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) SMALLINT(io.prestosql.spi.type.SmallintType.SMALLINT) Optional(java.util.Optional) TypeSignature(io.prestosql.spi.type.TypeSignature) SchemaTablePrefix(io.prestosql.spi.connector.SchemaTablePrefix) ObjectType(io.prestosql.elasticsearch.client.IndexMetadata.ObjectType) DateTimeType(io.prestosql.elasticsearch.client.IndexMetadata.DateTimeType) PrimitiveType(io.prestosql.elasticsearch.client.IndexMetadata.PrimitiveType) ObjectType(io.prestosql.elasticsearch.client.IndexMetadata.ObjectType) RowType(io.prestosql.spi.type.RowType) Type(io.prestosql.spi.type.Type) ArrayType(io.prestosql.spi.type.ArrayType) DateTimeType(io.prestosql.elasticsearch.client.IndexMetadata.DateTimeType) PrimitiveType(io.prestosql.elasticsearch.client.IndexMetadata.PrimitiveType) IndexMetadata(io.prestosql.elasticsearch.client.IndexMetadata)

Example 12 with INTEGER

use of io.prestosql.spi.type.IntegerType.INTEGER in project hetu-core by openlookeng.

the class TestMergingPageIterator method testMerging.

@Test
public void testMerging() {
    List<Type> types = ImmutableList.of(INTEGER, INTEGER);
    List<Integer> sortIndexes = ImmutableList.of(1);
    List<SortOrder> sortOrders = ImmutableList.of(SortOrder.ASC_NULLS_FIRST);
    List<List<Page>> pageLists = new ArrayList<>();
    PageBuilder pageBuilder = new PageBuilder(types);
    for (int i = 0; i < 10; i++) {
        Iterator<Integer> values = IntStream.range(0, 1000).map(ignored -> ThreadLocalRandom.current().nextInt(100_000)).mapToObj(n -> ((n % 100) == 0) ? null : n).sorted(nullsFirst(naturalOrder())).iterator();
        List<Page> pages = new ArrayList<>();
        for (int j = 0; j < 10; j++) {
            for (int k = 0; k < 100; k++) {
                Integer n = values.next();
                pageBuilder.declarePosition();
                if (n == null) {
                    pageBuilder.getBlockBuilder(0).appendNull();
                    pageBuilder.getBlockBuilder(1).appendNull();
                } else {
                    INTEGER.writeLong(pageBuilder.getBlockBuilder(0), n);
                    INTEGER.writeLong(pageBuilder.getBlockBuilder(1), n * 22L);
                }
            }
            pages.add(pageBuilder.build());
            pageBuilder.reset();
        }
        pageLists.add(pages);
        assertFalse(values.hasNext());
    }
    List<Iterator<Page>> pages = pageLists.stream().map(List::iterator).collect(toList());
    Iterator<Page> iterator = new MergingPageIterator(pages, types, sortIndexes, sortOrders);
    List<Long> values = new ArrayList<>();
    while (iterator.hasNext()) {
        Page page = iterator.next();
        for (int i = 0; i < page.getPositionCount(); i++) {
            if (page.getBlock(0).isNull(i)) {
                assertTrue(page.getBlock(1).isNull(i));
                values.add(null);
            } else {
                long x = INTEGER.getLong(page.getBlock(0), i);
                long y = INTEGER.getLong(page.getBlock(1), i);
                assertEquals(y, x * 22);
                values.add(x);
            }
        }
    }
    assertThat(values).isSortedAccordingTo(nullsFirst(naturalOrder()));
}
Also used : IntStream(java.util.stream.IntStream) Comparator.nullsFirst(java.util.Comparator.nullsFirst) Iterator(java.util.Iterator) Comparator.naturalOrder(java.util.Comparator.naturalOrder) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) Assert.assertEquals(org.testng.Assert.assertEquals) Page(io.prestosql.spi.Page) Test(org.testng.annotations.Test) INTEGER(io.prestosql.spi.type.IntegerType.INTEGER) PageBuilder(io.prestosql.spi.PageBuilder) SortOrder(io.prestosql.spi.block.SortOrder) ArrayList(java.util.ArrayList) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) ImmutableList(com.google.common.collect.ImmutableList) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) Assert.assertTrue(org.testng.Assert.assertTrue) Type(io.prestosql.spi.type.Type) Assert.assertFalse(org.testng.Assert.assertFalse) ArrayList(java.util.ArrayList) SortOrder(io.prestosql.spi.block.SortOrder) Page(io.prestosql.spi.Page) PageBuilder(io.prestosql.spi.PageBuilder) Type(io.prestosql.spi.type.Type) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) ImmutableList(com.google.common.collect.ImmutableList) Test(org.testng.annotations.Test)

Example 13 with INTEGER

use of io.prestosql.spi.type.IntegerType.INTEGER in project boostkit-bigdata by kunpengcompute.

the class HiveFileFormatBenchmark method createTpchDataSet.

private static <E extends TpchEntity> TestData createTpchDataSet(FileFormat format, TpchTable<E> tpchTable, List<TpchColumn<E>> columns) {
    List<String> columnNames = columns.stream().map(TpchColumn::getColumnName).collect(toList());
    List<Type> columnTypes = columns.stream().map(HiveFileFormatBenchmark::getColumnType).map(type -> format.supportsDate() || !DATE.equals(type) ? type : createUnboundedVarcharType()).collect(toList());
    PageBuilder pageBuilder = new PageBuilder(columnTypes);
    ImmutableList.Builder<Page> pages = ImmutableList.builder();
    long dataSize = 0;
    for (E row : tpchTable.createGenerator(10, 1, 1)) {
        pageBuilder.declarePosition();
        for (int i = 0; i < columns.size(); i++) {
            TpchColumn<E> column = columns.get(i);
            BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(i);
            switch(column.getType().getBase()) {
                case IDENTIFIER:
                    BIGINT.writeLong(blockBuilder, column.getIdentifier(row));
                    break;
                case INTEGER:
                    INTEGER.writeLong(blockBuilder, column.getInteger(row));
                    break;
                case DATE:
                    if (format.supportsDate()) {
                        DATE.writeLong(blockBuilder, column.getDate(row));
                    } else {
                        createUnboundedVarcharType().writeString(blockBuilder, column.getString(row));
                    }
                    break;
                case DOUBLE:
                    DOUBLE.writeDouble(blockBuilder, column.getDouble(row));
                    break;
                case VARCHAR:
                    createUnboundedVarcharType().writeSlice(blockBuilder, Slices.utf8Slice(column.getString(row)));
                    break;
                default:
                    throw new IllegalArgumentException("Unsupported type " + column.getType());
            }
        }
        if (pageBuilder.isFull()) {
            Page page = pageBuilder.build();
            pages.add(page);
            pageBuilder.reset();
            dataSize += page.getSizeInBytes();
            if (dataSize >= MIN_DATA_SIZE) {
                break;
            }
        }
    }
    return new TestData(columnNames, columnTypes, pages.build());
}
Also used : MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) RunResult(org.openjdk.jmh.results.RunResult) LINE_ITEM(io.airlift.tpch.TpchTable.LINE_ITEM) Random(java.util.Random) Warmup(org.openjdk.jmh.annotations.Warmup) ParquetFileWriterConfig(io.prestosql.plugin.hive.ParquetFileWriterConfig) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) HiveConfig(io.prestosql.plugin.hive.HiveConfig) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit) Files.createTempDirectory(java.nio.file.Files.createTempDirectory) Slices(io.airlift.slice.Slices) HiveTestUtils(io.prestosql.plugin.hive.HiveTestUtils) TearDown(org.openjdk.jmh.annotations.TearDown) Type(io.prestosql.spi.type.Type) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Setup(org.openjdk.jmh.annotations.Setup) BlockBuilder(io.prestosql.spi.block.BlockBuilder) Param(org.openjdk.jmh.annotations.Param) ArrayType(io.prestosql.spi.type.ArrayType) Collection(java.util.Collection) UUID(java.util.UUID) PageBuilder(io.prestosql.spi.PageBuilder) OrcFileWriterConfig(io.prestosql.plugin.hive.OrcFileWriterConfig) String.format(java.lang.String.format) TpchTable(io.airlift.tpch.TpchTable) UncheckedIOException(java.io.UncheckedIOException) TpchEntity(io.airlift.tpch.TpchEntity) OptionsBuilder(org.openjdk.jmh.runner.options.OptionsBuilder) DataSize(io.airlift.units.DataSize) List(java.util.List) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) HiveCompressionCodec(io.prestosql.plugin.hive.HiveCompressionCodec) Options(org.openjdk.jmh.runner.options.Options) TpchColumn(io.airlift.tpch.TpchColumn) Measurement(org.openjdk.jmh.annotations.Measurement) HiveSessionProperties(io.prestosql.plugin.hive.HiveSessionProperties) ORDERS(io.airlift.tpch.TpchTable.ORDERS) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) Scope(org.openjdk.jmh.annotations.Scope) INTEGER(io.prestosql.spi.type.IntegerType.INTEGER) OrderColumn(io.airlift.tpch.OrderColumn) ArrayList(java.util.ArrayList) HdfsEnvironment(io.prestosql.plugin.hive.HdfsEnvironment) Statistics(org.openjdk.jmh.util.Statistics) AuxCounters(org.openjdk.jmh.annotations.AuxCounters) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) ImmutableList(com.google.common.collect.ImmutableList) DOUBLE(io.prestosql.spi.type.DoubleType.DOUBLE) DATE(io.prestosql.spi.type.DateType.DATE) Runner(org.openjdk.jmh.runner.Runner) HadoopNative(io.prestosql.hadoop.HadoopNative) Page(io.prestosql.spi.Page) IOException(java.io.IOException) State(org.openjdk.jmh.annotations.State) Benchmark(org.openjdk.jmh.annotations.Benchmark) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) Collectors.toList(java.util.stream.Collectors.toList) Fork(org.openjdk.jmh.annotations.Fork) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) IntArrays(it.unimi.dsi.fastutil.ints.IntArrays) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) DOUBLE(io.prestosql.spi.type.DoubleType.DOUBLE) DATE(io.prestosql.spi.type.DateType.DATE) ImmutableList(com.google.common.collect.ImmutableList) Page(io.prestosql.spi.Page) PageBuilder(io.prestosql.spi.PageBuilder) Type(io.prestosql.spi.type.Type) ArrayType(io.prestosql.spi.type.ArrayType) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) BlockBuilder(io.prestosql.spi.block.BlockBuilder)

Example 14 with INTEGER

use of io.prestosql.spi.type.IntegerType.INTEGER in project boostkit-bigdata by kunpengcompute.

the class HiveUtil method createRecordReader.

public static RecordReader<?, ?> createRecordReader(Configuration configuration, Path path, long start, long length, Properties schema, List<HiveColumnHandle> columns, Map<String, String> customSplitInfo) {
    // determine which hive columns we will read
    List<HiveColumnHandle> readColumns = ImmutableList.copyOf(filter(columns, column -> column.getColumnType() == HiveColumnHandle.ColumnType.REGULAR));
    List<Integer> readHiveColumnIndexes = ImmutableList.copyOf(transform(readColumns, HiveColumnHandle::getHiveColumnIndex));
    // Tell hive the columns we would like to read, this lets hive optimize reading column oriented files
    setReadColumns(configuration, readHiveColumnIndexes);
    // Only propagate serialization schema configs by default
    Predicate<String> schemaFilter = schemaProperty -> schemaProperty.startsWith("serialization.");
    JobConf jobConf = ConfigurationUtils.toJobConf(configuration);
    InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, true, jobConf);
    FileSplit fileSplit = new FileSplit(path, start, length, (String[]) null);
    if (!customSplitInfo.isEmpty() && isHudiRealtimeSplit(customSplitInfo)) {
        fileSplit = recreateSplitWithCustomInfo(fileSplit, customSplitInfo);
        // Add additional column information for record reader
        List<String> readHiveColumnNames = ImmutableList.copyOf(transform(readColumns, HiveColumnHandle::getName));
        jobConf.set(READ_COLUMN_NAMES_CONF_STR, Joiner.on(',').join(readHiveColumnNames));
        // Remove filter when using customSplitInfo as the record reader requires complete schema configs
        schemaFilter = schemaProperty -> true;
    }
    schema.stringPropertyNames().stream().filter(schemaFilter).forEach(name -> jobConf.set(name, schema.getProperty(name)));
    // add Airlift LZO and LZOP to head of codecs list so as to not override existing entries
    List<String> codecs = newArrayList(Splitter.on(",").trimResults().omitEmptyStrings().split(jobConf.get("io.compression.codecs", "")));
    if (!codecs.contains(LzoCodec.class.getName())) {
        codecs.add(0, LzoCodec.class.getName());
    }
    if (!codecs.contains(LzopCodec.class.getName())) {
        codecs.add(0, LzopCodec.class.getName());
    }
    jobConf.set("io.compression.codecs", codecs.stream().collect(joining(",")));
    try {
        RecordReader<WritableComparable, Writable> recordReader = (RecordReader<WritableComparable, Writable>) inputFormat.getRecordReader(fileSplit, jobConf, Reporter.NULL);
        int headerCount = getHeaderCount(schema);
        if (headerCount > 0) {
            Utilities.skipHeader(recordReader, headerCount, recordReader.createKey(), recordReader.createValue());
        }
        int footerCount = getFooterCount(schema);
        if (footerCount > 0) {
            recordReader = new FooterAwareRecordReader<>(recordReader, footerCount, jobConf);
        }
        return recordReader;
    } catch (IOException e) {
        if (e instanceof TextLineLengthLimitExceededException) {
            throw new PrestoException(HiveErrorCode.HIVE_BAD_DATA, "Line too long in text file: " + path, e);
        }
        throw new PrestoException(HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT, format("Error opening Hive split %s (offset=%s, length=%s) using %s: %s", path, start, length, getInputFormatName(schema), firstNonNull(e.getMessage(), e.getClass().getName())), e);
    }
}
Also used : Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) DECIMAL_TYPE_NAME(org.apache.hadoop.hive.serde.serdeConstants.DECIMAL_TYPE_NAME) Writable(org.apache.hadoop.io.Writable) NullableValue(io.prestosql.spi.predicate.NullableValue) Short.parseShort(java.lang.Short.parseShort) SortOrder(io.prestosql.spi.block.SortOrder) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) BigDecimal(java.math.BigDecimal) ObjectMapperProvider(io.airlift.json.ObjectMapperProvider) HudiRealtimeSplitConverter(io.prestosql.plugin.hive.util.HudiRealtimeSplitConverter) FileSplit(org.apache.hadoop.mapred.FileSplit) Matcher(java.util.regex.Matcher) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) BigInteger(java.math.BigInteger) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Chars.isCharType(io.prestosql.spi.type.Chars.isCharType) Double.parseDouble(java.lang.Double.parseDouble) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) LzoCodec(io.airlift.compress.lzo.LzoCodec) CustomSplitConversionUtils.recreateSplitWithCustomInfo(io.prestosql.plugin.hive.util.CustomSplitConversionUtils.recreateSplitWithCustomInfo) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) Decimals.isLongDecimal(io.prestosql.spi.type.Decimals.isLongDecimal) READ_ALL_COLUMNS(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_ALL_COLUMNS) TIMESTAMP(io.prestosql.spi.type.TimestampType.TIMESTAMP) Collectors.joining(java.util.stream.Collectors.joining) InvocationTargetException(java.lang.reflect.InvocationTargetException) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) Table(io.prestosql.plugin.hive.metastore.Table) Predicate(com.google.common.base.Predicate) DateTimePrinter(org.joda.time.format.DateTimePrinter) GENERIC_INTERNAL_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) RecordReader(org.apache.hadoop.mapred.RecordReader) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) Iterables.filter(com.google.common.collect.Iterables.filter) Joiner(com.google.common.base.Joiner) Chars.trimTrailingSpaces(io.prestosql.spi.type.Chars.trimTrailingSpaces) Slice(io.airlift.slice.Slice) StandardTypes(io.prestosql.spi.type.StandardTypes) DateTimeFormatterBuilder(org.joda.time.format.DateTimeFormatterBuilder) CharType(io.prestosql.spi.type.CharType) WritableComparable(org.apache.hadoop.io.WritableComparable) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) Byte.parseByte(java.lang.Byte.parseByte) DOUBLE(io.prestosql.spi.type.DoubleType.DOUBLE) RecordCursor(io.prestosql.spi.connector.RecordCursor) Nullable(javax.annotation.Nullable) CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) Properties(java.util.Properties) Reporter(org.apache.hadoop.mapred.Reporter) AbstractIterator(com.google.common.collect.AbstractIterator) MergingPageIterator(io.prestosql.plugin.hive.util.MergingPageIterator) TypeManager(io.prestosql.spi.type.TypeManager) IOException(java.io.IOException) HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) Field(java.lang.reflect.Field) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) TableType(org.apache.hadoop.hive.metastore.TableType) ConfigurationUtils(io.prestosql.plugin.hive.util.ConfigurationUtils) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) Long.parseLong(java.lang.Long.parseLong) ReflectionUtils(org.apache.hadoop.util.ReflectionUtils) VarcharType(io.prestosql.spi.type.VarcharType) AbstractVariableWidthType(io.prestosql.spi.type.AbstractVariableWidthType) Varchars.isVarcharType(io.prestosql.spi.type.Varchars.isVarcharType) DateTimeParser(org.joda.time.format.DateTimeParser) DynamicFilter(io.prestosql.spi.dynamicfilter.DynamicFilter) DecimalType(io.prestosql.spi.type.DecimalType) READ_COLUMN_NAMES_CONF_STR(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR) Float.parseFloat(java.lang.Float.parseFloat) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ParquetHiveSerDe(org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe) InputFormat(org.apache.hadoop.mapred.InputFormat) Path(org.apache.hadoop.fs.Path) HiveBucketing.getHiveBucketHandle(io.prestosql.plugin.hive.HiveBucketing.getHiveBucketHandle) BOOLEAN(io.prestosql.spi.type.BooleanType.BOOLEAN) Type(io.prestosql.spi.type.Type) Splitter(com.google.common.base.Splitter) Method(java.lang.reflect.Method) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) DecimalType.createDecimalType(io.prestosql.spi.type.DecimalType.createDecimalType) SliceUtf8(io.airlift.slice.SliceUtf8) DateTimeFormat(org.joda.time.format.DateTimeFormat) ISODateTimeFormat(org.joda.time.format.ISODateTimeFormat) PrestoException(io.prestosql.spi.PrestoException) TINYINT(io.prestosql.spi.type.TinyintType.TINYINT) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) Decimals.isShortDecimal(io.prestosql.spi.type.Decimals.isShortDecimal) Base64(java.util.Base64) List(java.util.List) JsonCodecFactory(io.airlift.json.JsonCodecFactory) Annotation(java.lang.annotation.Annotation) Optional(java.util.Optional) MoreObjects.firstNonNull(com.google.common.base.MoreObjects.firstNonNull) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) READ_COLUMN_IDS_CONF_STR(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR) Pattern(java.util.regex.Pattern) FileUtils.unescapePathName(org.apache.hadoop.hive.common.FileUtils.unescapePathName) JsonCodec(io.airlift.json.JsonCodec) ErrorCodeSupplier(io.prestosql.spi.ErrorCodeSupplier) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) Logger(io.airlift.log.Logger) ConnectorViewDefinition(io.prestosql.spi.connector.ConnectorViewDefinition) ROUND_UNNECESSARY(java.math.BigDecimal.ROUND_UNNECESSARY) Decimals(io.prestosql.spi.type.Decimals) INTEGER(io.prestosql.spi.type.IntegerType.INTEGER) IOConstants(org.apache.hadoop.hive.ql.io.IOConstants) Lists.transform(com.google.common.collect.Lists.transform) PrestoAvroSerDe(io.prestosql.plugin.hive.avro.PrestoAvroSerDe) OptionalInt(java.util.OptionalInt) LzopCodec(io.airlift.compress.lzo.LzopCodec) SymlinkTextInputFormat(org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat) Utilities(org.apache.hadoop.hive.ql.exec.Utilities) HashSet(java.util.HashSet) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) MapredParquetInputFormat(org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat) ImmutableList(com.google.common.collect.ImmutableList) Objects.requireNonNull(java.util.Objects.requireNonNull) BloomFilterDynamicFilter(io.prestosql.spi.dynamicfilter.BloomFilterDynamicFilter) HiveColumnHandle.bucketColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle.bucketColumnHandle) DATE(io.prestosql.spi.type.DateType.DATE) REAL(io.prestosql.spi.type.RealType.REAL) VerifyException(com.google.common.base.VerifyException) TextLineLengthLimitExceededException(io.prestosql.hadoop.TextLineLengthLimitExceededException) Iterator(java.util.Iterator) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) UTF_8(java.nio.charset.StandardCharsets.UTF_8) Page(io.prestosql.spi.Page) Integer.parseInt(java.lang.Integer.parseInt) FooterAwareRecordReader(io.prestosql.plugin.hive.util.FooterAwareRecordReader) JavaUtils(org.apache.hadoop.hive.common.JavaUtils) JobConf(org.apache.hadoop.mapred.JobConf) TimeUnit(java.util.concurrent.TimeUnit) Collectors.toList(java.util.stream.Collectors.toList) SMALLINT(io.prestosql.spi.type.SmallintType.SMALLINT) Column(io.prestosql.plugin.hive.metastore.Column) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) VisibleForTesting(com.google.common.annotations.VisibleForTesting) HiveBucketing.bucketedOnTimestamp(io.prestosql.plugin.hive.HiveBucketing.bucketedOnTimestamp) RecordReader(org.apache.hadoop.mapred.RecordReader) FooterAwareRecordReader(io.prestosql.plugin.hive.util.FooterAwareRecordReader) Writable(org.apache.hadoop.io.Writable) LzoCodec(io.airlift.compress.lzo.LzoCodec) PrestoException(io.prestosql.spi.PrestoException) IOException(java.io.IOException) FileSplit(org.apache.hadoop.mapred.FileSplit) HoodieRealtimeFileSplit(org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit) BigInteger(java.math.BigInteger) LzopCodec(io.airlift.compress.lzo.LzopCodec) TextLineLengthLimitExceededException(io.prestosql.hadoop.TextLineLengthLimitExceededException) WritableComparable(org.apache.hadoop.io.WritableComparable) JobConf(org.apache.hadoop.mapred.JobConf)

Example 15 with INTEGER

use of io.prestosql.spi.type.IntegerType.INTEGER in project boostkit-bigdata by kunpengcompute.

the class TestHivePageSink method writeTestFile.

private static long writeTestFile(HiveConfig config, HiveMetastore metastore, String outputPath) {
    HiveTransactionHandle transaction = new HiveTransactionHandle();
    HiveWriterStats stats = new HiveWriterStats();
    ConnectorPageSink pageSink = createPageSink(transaction, config, metastore, new Path("file:///" + outputPath), stats);
    List<LineItemColumn> columns = getTestColumns();
    List<Type> columnTypes = columns.stream().map(LineItemColumn::getType).map(TestHivePageSink::getHiveType).map(hiveType -> hiveType.getType(HiveTestUtils.TYPE_MANAGER)).collect(toList());
    PageBuilder pageBuilder = new PageBuilder(columnTypes);
    int rows = 0;
    for (LineItem lineItem : new LineItemGenerator(0.01, 1, 1)) {
        rows++;
        if (rows >= NUM_ROWS) {
            break;
        }
        pageBuilder.declarePosition();
        for (int i = 0; i < columns.size(); i++) {
            LineItemColumn column = columns.get(i);
            BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(i);
            switch(column.getType().getBase()) {
                case IDENTIFIER:
                    BIGINT.writeLong(blockBuilder, column.getIdentifier(lineItem));
                    break;
                case INTEGER:
                    INTEGER.writeLong(blockBuilder, column.getInteger(lineItem));
                    break;
                case DATE:
                    DATE.writeLong(blockBuilder, column.getDate(lineItem));
                    break;
                case DOUBLE:
                    DOUBLE.writeDouble(blockBuilder, column.getDouble(lineItem));
                    break;
                case VARCHAR:
                    createUnboundedVarcharType().writeSlice(blockBuilder, Slices.utf8Slice(column.getString(lineItem)));
                    break;
                default:
                    throw new IllegalArgumentException("Unsupported type " + column.getType());
            }
        }
    }
    Page page = pageBuilder.build();
    pageSink.appendPage(page);
    getFutureValue(pageSink.finish());
    File outputDir = new File(outputPath);
    List<File> files = ImmutableList.copyOf(outputDir.listFiles((dir, name) -> !name.endsWith(".crc")));
    File outputFile = getOnlyElement(files);
    long length = outputFile.length();
    ConnectorPageSource pageSource = createPageSource(transaction, config, outputFile);
    List<Page> pages = new ArrayList<>();
    while (!pageSource.isFinished()) {
        Page nextPage = pageSource.getNextPage();
        if (nextPage != null) {
            pages.add(nextPage.getLoadedPage());
        }
    }
    MaterializedResult expectedResults = toMaterializedResult(getSession(config), columnTypes, ImmutableList.of(page));
    MaterializedResult results = toMaterializedResult(getSession(config), columnTypes, pages);
    assertEquals(results, expectedResults);
    assertEquals(round(stats.getInputPageSizeInBytes().getAllTime().getMax()), page.getRetainedSizeInBytes());
    return length;
}
Also used : Path(org.apache.hadoop.fs.Path) NONE(io.prestosql.plugin.hive.HiveCompressionCodec.NONE) HiveTestUtils.getDefaultHiveSelectiveFactories(io.prestosql.plugin.hive.HiveTestUtils.getDefaultHiveSelectiveFactories) MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) Assertions.assertGreaterThan(io.airlift.testing.Assertions.assertGreaterThan) ConnectorPageSink(io.prestosql.spi.connector.ConnectorPageSink) Test(org.testng.annotations.Test) TpchColumnTypes(io.airlift.tpch.TpchColumnTypes) MaterializedResult(io.prestosql.testing.MaterializedResult) Assert.assertEquals(io.prestosql.testing.assertions.Assert.assertEquals) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Math.round(java.lang.Math.round) Files.createTempDirectory(java.nio.file.Files.createTempDirectory) Slices(io.airlift.slice.Slices) HIVE_STRING(io.prestosql.plugin.hive.HiveType.HIVE_STRING) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Path(org.apache.hadoop.fs.Path) Matchers.anyInt(org.mockito.Matchers.anyInt) Type(io.prestosql.spi.type.Type) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) HIVE_INT(io.prestosql.plugin.hive.HiveType.HIVE_INT) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) TpchColumnType(io.airlift.tpch.TpchColumnType) PageIndexerFactory(io.prestosql.spi.PageIndexerFactory) HIVE_LONG(io.prestosql.plugin.hive.HiveType.HIVE_LONG) ImmutableMap(com.google.common.collect.ImmutableMap) BlockBuilder(io.prestosql.spi.block.BlockBuilder) MetadataManager.createTestMetadataManager(io.prestosql.metadata.MetadataManager.createTestMetadataManager) PageBuilder(io.prestosql.spi.PageBuilder) String.format(java.lang.String.format) LineItemGenerator(io.airlift.tpch.LineItemGenerator) PageIndexer(io.prestosql.spi.PageIndexer) List(java.util.List) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) Stream(java.util.stream.Stream) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) Optional(java.util.Optional) TestingNodeManager(io.prestosql.testing.TestingNodeManager) Joiner(com.google.common.base.Joiner) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) JsonCodec(io.airlift.json.JsonCodec) Mockito.mock(org.mockito.Mockito.mock) LineItem(io.airlift.tpch.LineItem) HivePageSinkMetadata(io.prestosql.plugin.hive.metastore.HivePageSinkMetadata) INTEGER(io.prestosql.spi.type.IntegerType.INTEGER) OptionalInt(java.util.OptionalInt) DIRECT_TO_TARGET_NEW_DIRECTORY(io.prestosql.plugin.hive.LocationHandle.WriteMode.DIRECT_TO_TARGET_NEW_DIRECTORY) ArrayList(java.util.ArrayList) GroupByHashPageIndexerFactory(io.prestosql.GroupByHashPageIndexerFactory) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) REGULAR(io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) GenericExceptionAction(io.prestosql.plugin.hive.authentication.GenericExceptionAction) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) ImmutableList(com.google.common.collect.ImmutableList) Matchers.anyObject(org.mockito.Matchers.anyObject) LineItemColumn(io.airlift.tpch.LineItemColumn) DOUBLE(io.prestosql.spi.type.DoubleType.DOUBLE) DATE(io.prestosql.spi.type.DateType.DATE) FileHiveMetastore.createTestingFileHiveMetastore(io.prestosql.plugin.hive.metastore.file.FileHiveMetastore.createTestingFileHiveMetastore) HiveMetastore(io.prestosql.plugin.hive.metastore.HiveMetastore) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) Properties(java.util.Properties) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TypeManager(io.prestosql.spi.type.TypeManager) Page(io.prestosql.spi.Page) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) Mockito.when(org.mockito.Mockito.when) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) File(java.io.File) Collectors.toList(java.util.stream.Collectors.toList) IntArrayBlock(io.prestosql.spi.block.IntArrayBlock) HIVE_DATE(io.prestosql.plugin.hive.HiveType.HIVE_DATE) HIVE_DOUBLE(io.prestosql.plugin.hive.HiveType.HIVE_DOUBLE) JoinCompiler(io.prestosql.sql.gen.JoinCompiler) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) Assert.assertTrue(org.testng.Assert.assertTrue) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) Collections(java.util.Collections) LineItemColumn(io.airlift.tpch.LineItemColumn) ArrayList(java.util.ArrayList) LineItem(io.airlift.tpch.LineItem) Page(io.prestosql.spi.Page) PageBuilder(io.prestosql.spi.PageBuilder) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) Type(io.prestosql.spi.type.Type) TpchColumnType(io.airlift.tpch.TpchColumnType) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) ConnectorPageSink(io.prestosql.spi.connector.ConnectorPageSink) MaterializedResult(io.prestosql.testing.MaterializedResult) File(java.io.File) LineItemGenerator(io.airlift.tpch.LineItemGenerator) BlockBuilder(io.prestosql.spi.block.BlockBuilder)

Aggregations

INTEGER (io.prestosql.spi.type.IntegerType.INTEGER)24 Type (io.prestosql.spi.type.Type)23 List (java.util.List)23 ImmutableList (com.google.common.collect.ImmutableList)22 Optional (java.util.Optional)19 BIGINT (io.prestosql.spi.type.BigintType.BIGINT)17 ImmutableMap (com.google.common.collect.ImmutableMap)15 Page (io.prestosql.spi.Page)15 IOException (java.io.IOException)15 String.format (java.lang.String.format)13 ArrayList (java.util.ArrayList)13 Collectors.toList (java.util.stream.Collectors.toList)13 ConnectorSession (io.prestosql.spi.connector.ConnectorSession)12 Map (java.util.Map)12 Objects.requireNonNull (java.util.Objects.requireNonNull)12 Properties (java.util.Properties)12 Path (org.apache.hadoop.fs.Path)12 ConnectorPageSource (io.prestosql.spi.connector.ConnectorPageSource)11 DOUBLE (io.prestosql.spi.type.DoubleType.DOUBLE)11 TypeManager (io.prestosql.spi.type.TypeManager)11