Search in sources :

Example 6 with SettableStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.

the class TestOrcReaderMemoryUsage method createSingleColumnVarcharFile.

/**
 * Write a file that contains a number of rows with 1 VARCHAR column, and all values are not null.
 */
private static TempFile createSingleColumnVarcharFile(int count, int length) throws Exception {
    Serializer serde = new OrcSerde();
    TempFile tempFile = new TempFile();
    FileSinkOperator.RecordWriter writer = createOrcRecordWriter(tempFile.getFile(), ORC_12, CompressionKind.NONE, VARCHAR);
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", VARCHAR);
    Object row = objectInspector.create();
    StructField field = objectInspector.getAllStructFieldRefs().get(0);
    for (int i = 0; i < count; i++) {
        objectInspector.setStructFieldData(row, field, Strings.repeat("0", length));
        Writable record = serde.serialize(row, objectInspector);
        writer.write(record);
    }
    writer.close(false);
    return tempFile;
}
Also used : SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) OrcTester.createSettableStructObjectInspector(com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Writable(org.apache.hadoop.io.Writable) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Example 7 with SettableStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.

the class TestOrcReaderMemoryUsage method createSingleColumnMapFileWithNullValues.

/**
 * Write a file that contains a given number of maps where each row has 10 entries in total
 * and some entries have null keys/values.
 */
private static TempFile createSingleColumnMapFileWithNullValues(Type mapType, int rows) throws IOException, ReflectiveOperationException, SerDeException {
    Serializer serde = new OrcSerde();
    TempFile tempFile = new TempFile();
    FileSinkOperator.RecordWriter writer = createOrcRecordWriter(tempFile.getFile(), ORC_12, CompressionKind.NONE, mapType);
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", mapType);
    Object row = objectInspector.create();
    StructField field = objectInspector.getAllStructFieldRefs().get(0);
    for (int i = 1; i <= rows; i++) {
        HashMap<Long, Long> map = new HashMap<>();
        for (int j = 1; j <= 8; j++) {
            Long value = (long) j;
            map.put(value, value);
        }
        // Add null values so that the StreamReader nullVectors are not empty.
        map.put(null, 0L);
        map.put(0L, null);
        objectInspector.setStructFieldData(row, field, map);
        Writable record = serde.serialize(row, objectInspector);
        writer.write(record);
    }
    writer.close(false);
    return tempFile;
}
Also used : FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) HashMap(java.util.HashMap) Writable(org.apache.hadoop.io.Writable) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) OrcTester.createSettableStructObjectInspector(com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Example 8 with SettableStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.

the class AbstractTestHiveFileFormats method createTestFile.

public static FileSplit createTestFile(String filePath, HiveStorageFormat storageFormat, HiveCompressionCodec compressionCodec, List<TestColumn> testColumns, int numRows) throws Exception {
    HiveOutputFormat<?, ?> outputFormat = newInstance(storageFormat.getOutputFormat(), HiveOutputFormat.class);
    Serializer serializer = newInstance(storageFormat.getSerDe(), Serializer.class);
    // filter out partition keys, which are not written to the file
    testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey)));
    Properties tableProperties = new Properties();
    tableProperties.setProperty("columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName)));
    tableProperties.setProperty("columns.types", Joiner.on(',').join(transform(testColumns, TestColumn::getType)));
    serializer.initialize(new Configuration(), tableProperties);
    JobConf jobConf = configureCompression(new JobConf(), compressionCodec);
    RecordWriter recordWriter = outputFormat.getHiveRecordWriter(jobConf, new Path(filePath), Text.class, compressionCodec != HiveCompressionCodec.NONE, tableProperties, () -> {
    });
    try {
        serializer.initialize(new Configuration(), tableProperties);
        SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(ImmutableList.copyOf(transform(testColumns, TestColumn::getName)), ImmutableList.copyOf(transform(testColumns, TestColumn::getObjectInspector)));
        Object row = objectInspector.create();
        List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
        for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
            for (int i = 0; i < testColumns.size(); i++) {
                Object writeValue = testColumns.get(i).getWriteValue();
                if (writeValue instanceof Slice) {
                    writeValue = ((Slice) writeValue).getBytes();
                }
                objectInspector.setStructFieldData(row, fields.get(i), writeValue);
            }
            Writable record = serializer.serialize(row, objectInspector);
            recordWriter.write(record);
        }
    } finally {
        recordWriter.close(false);
    }
    // todo to test with compression, the file must be renamed with the compression extension
    Path path = new Path(filePath);
    path.getFileSystem(new Configuration()).setVerifyChecksum(true);
    File file = new File(filePath);
    return new FileSplit(path, 0, file.length(), new String[0]);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Writable(org.apache.hadoop.io.Writable) Properties(java.util.Properties) FileSplit(org.apache.hadoop.mapred.FileSplit) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) Slice(io.airlift.slice.Slice) SerDeUtils.serializeObject(com.facebook.presto.hive.util.SerDeUtils.serializeObject) JobConf(org.apache.hadoop.mapred.JobConf) File(java.io.File) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Example 9 with SettableStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.

the class TestOrcBatchPageSourceMemoryTracking method createTestFile.

public static FileSplit createTestFile(String filePath, HiveOutputFormat<?, ?> outputFormat, Serializer serializer, String compressionCodec, List<TestColumn> testColumns, int numRows, int stripeRows) throws Exception {
    // filter out partition keys, which are not written to the file
    testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey)));
    Properties tableProperties = new Properties();
    tableProperties.setProperty("columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName)));
    tableProperties.setProperty("columns.types", Joiner.on(',').join(transform(testColumns, TestColumn::getType)));
    serializer.initialize(CONFIGURATION, tableProperties);
    JobConf jobConf = new JobConf();
    if (compressionCodec != null) {
        CompressionCodec codec = new CompressionCodecFactory(CONFIGURATION).getCodecByName(compressionCodec);
        jobConf.set(COMPRESS_CODEC, codec.getClass().getName());
        jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString());
    }
    RecordWriter recordWriter = createRecordWriter(new Path(filePath), CONFIGURATION);
    try {
        SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(ImmutableList.copyOf(transform(testColumns, TestColumn::getName)), ImmutableList.copyOf(transform(testColumns, TestColumn::getObjectInspector)));
        Object row = objectInspector.create();
        List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
        for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
            for (int i = 0; i < testColumns.size(); i++) {
                Object writeValue = testColumns.get(i).getWriteValue();
                if (writeValue instanceof Slice) {
                    writeValue = ((Slice) writeValue).getBytes();
                }
                objectInspector.setStructFieldData(row, fields.get(i), writeValue);
            }
            Writable record = serializer.serialize(row, objectInspector);
            recordWriter.write(record);
            if (rowNumber % stripeRows == stripeRows - 1) {
                flushStripe(recordWriter);
            }
        }
    } finally {
        recordWriter.close(false);
    }
    Path path = new Path(filePath);
    path.getFileSystem(CONFIGURATION).setVerifyChecksum(true);
    File file = new File(filePath);
    return new FileSplit(path, 0, file.length(), new String[0]);
}
Also used : Path(org.apache.hadoop.fs.Path) Writable(org.apache.hadoop.io.Writable) Properties(java.util.Properties) FileSplit(org.apache.hadoop.mapred.FileSplit) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) Slice(io.airlift.slice.Slice) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) JobConf(org.apache.hadoop.mapred.JobConf) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile)

Example 10 with SettableStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.

the class OrcTester method writeOrcFileColumnOld.

public static DataSize writeOrcFileColumnOld(File outputFile, Format format, RecordWriter recordWriter, ObjectInspector columnObjectInspector, Iterator<?> values) throws Exception {
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", columnObjectInspector);
    Object row = objectInspector.create();
    List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
    int i = 0;
    TypeInfo typeInfo = getTypeInfoFromTypeString(columnObjectInspector.getTypeName());
    while (values.hasNext()) {
        Object value = values.next();
        value = preprocessWriteValueOld(typeInfo, value);
        objectInspector.setStructFieldData(row, fields.get(0), value);
        @SuppressWarnings("deprecation") Serializer serde;
        if (DWRF == format) {
            serde = new com.facebook.hive.orc.OrcSerde();
            if (i == 142_345) {
                setDwrfLowMemoryFlag(recordWriter);
            }
        } else {
            serde = new OrcSerde();
        }
        Writable record = serde.serialize(row, objectInspector);
        recordWriter.write(record);
        i++;
    }
    recordWriter.close(false);
    return succinctBytes(outputFile.length());
}
Also used : Writable(org.apache.hadoop.io.Writable) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Aggregations

StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)21 SettableStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector)18 Writable (org.apache.hadoop.io.Writable)16 Serializer (org.apache.hadoop.hive.serde2.Serializer)12 OrcSerde (org.apache.hadoop.hive.ql.io.orc.OrcSerde)9 OrcTester.createSettableStructObjectInspector (com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector)8 RecordWriter (org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter)8 Path (org.apache.hadoop.fs.Path)7 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)7 Properties (java.util.Properties)5 JobConf (org.apache.hadoop.mapred.JobConf)5 Slice (io.airlift.slice.Slice)3 File (java.io.File)3 AbstractIterator (com.google.common.collect.AbstractIterator)2 DataSize (io.airlift.units.DataSize)2 Iterator (java.util.Iterator)2 ParquetHiveSerDe (org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe)2 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)2 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)2 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)2