Search in sources :

Example 1 with SettableStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project hive by apache.

the class VectorDeserializeRow method convertStructRowColumn.

private Object convertStructRowColumn(ColumnVector colVector, int batchIndex, Field field) throws IOException {
    final SettableStructObjectInspector structOI = (SettableStructObjectInspector) field.objectInspector;
    final List<? extends StructField> structFields = structOI.getAllStructFieldRefs();
    final StructComplexTypeHelper structHelper = (StructComplexTypeHelper) field.getComplexHelper();
    final Field[] fields = structHelper.getFields();
    final StructColumnVector structColumnVector = (StructColumnVector) colVector;
    final Object struct = structOI.create();
    for (int i = 0; i < fields.length; i++) {
        final Object fieldObject = convertComplexFieldRowColumn(structColumnVector.fields[i], batchIndex, fields[i]);
        structOI.setStructFieldData(struct, structFields.get(i), fieldObject);
    }
    deserializeRead.finishComplexVariableFieldsType();
    return struct;
}
Also used : SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField)

Example 2 with SettableStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.

the class ParquetTester method writeParquetColumn.

private static DataSize writeParquetColumn(JobConf jobConf, File outputFile, CompressionCodecName compressionCodecName, Properties tableProperties, SettableStructObjectInspector objectInspector, Iterator<?>[] valuesByField, Optional<MessageType> parquetSchema, boolean singleLevelArray) throws Exception {
    RecordWriter recordWriter = new TestMapredParquetOutputFormat(parquetSchema, singleLevelArray).getHiveRecordWriter(jobConf, new Path(outputFile.toURI()), Text.class, compressionCodecName != UNCOMPRESSED, tableProperties, () -> {
    });
    Object row = objectInspector.create();
    List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
    while (stream(valuesByField).allMatch(Iterator::hasNext)) {
        for (int field = 0; field < fields.size(); field++) {
            Object value = valuesByField[field].next();
            objectInspector.setStructFieldData(row, fields.get(field), value);
        }
        ParquetHiveSerDe serde = new ParquetHiveSerDe();
        serde.initialize(jobConf, tableProperties, null);
        Writable record = serde.serialize(row, objectInspector);
        recordWriter.write(record);
    }
    recordWriter.close(false);
    return succinctBytes(outputFile.length());
}
Also used : Path(org.apache.hadoop.fs.Path) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) TestMapredParquetOutputFormat(com.facebook.presto.hive.parquet.write.TestMapredParquetOutputFormat) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ParquetHiveSerDe(org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe) AbstractIterator(com.google.common.collect.AbstractIterator) Iterator(java.util.Iterator) Writable(org.apache.hadoop.io.Writable)

Example 3 with SettableStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.

the class OrcTester method writeOrcFileColumnHive.

private static DataSize writeOrcFileColumnHive(File outputFile, Format format, RecordWriter recordWriter, List<Type> types, List<List<?>> values) throws Exception {
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector(types);
    Object row = objectInspector.create();
    List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
    Serializer serializer = format.createSerializer();
    for (int i = 0; i < values.get(0).size(); i++) {
        for (int j = 0; j < types.size(); j++) {
            Object value = preprocessWriteValueHive(types.get(j), values.get(j).get(i));
            objectInspector.setStructFieldData(row, fields.get(j), value);
        }
        if (DWRF == format) {
            if (i == 142_345) {
                setDwrfLowMemoryFlag(recordWriter);
            }
        }
        Writable record = serializer.serialize(row, objectInspector);
        recordWriter.write(record);
    }
    recordWriter.close(false);
    return succinctBytes(outputFile.length());
}
Also used : SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) Writable(org.apache.hadoop.io.Writable) IntWritable(org.apache.hadoop.io.IntWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) LongWritable(org.apache.hadoop.io.LongWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) ByteWritable(org.apache.hadoop.io.ByteWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) OrcLazyObject(com.facebook.hive.orc.lazy.OrcLazyObject) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Example 4 with SettableStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.

the class AbstractTestOrcReader method createTempFile.

private static TempFile createTempFile(int nRecords) throws IOException, SerDeException {
    TempFile file = new TempFile();
    RecordWriter writer = createOrcRecordWriter(file.getFile(), ORC_12, CompressionKind.NONE, BIGINT);
    @SuppressWarnings("deprecation") Serializer serde = new OrcSerde();
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", BIGINT);
    Object row = objectInspector.create();
    StructField field = objectInspector.getAllStructFieldRefs().get(0);
    objectInspector.setStructFieldData(row, field, 1L);
    Writable record = serde.serialize(row, objectInspector);
    for (int i = 0; i < nRecords; i++) {
        writer.write(record);
    }
    writer.close(false);
    return file;
}
Also used : SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) OrcTester.createSettableStructObjectInspector(com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) OrcTester.createOrcRecordWriter(com.facebook.presto.orc.OrcTester.createOrcRecordWriter) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Writable(org.apache.hadoop.io.Writable) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Example 5 with SettableStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.

the class TestOrcReaderMemoryUsage method createSingleColumnFileWithNullValues.

/**
 * Write a file that contains a number of rows with 1 BIGINT column, and some rows have null values.
 */
private static TempFile createSingleColumnFileWithNullValues(int rows) throws IOException, ReflectiveOperationException, SerDeException {
    Serializer serde = new OrcSerde();
    TempFile tempFile = new TempFile();
    FileSinkOperator.RecordWriter writer = createOrcRecordWriter(tempFile.getFile(), ORC_12, CompressionKind.NONE, BIGINT);
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", BIGINT);
    Object row = objectInspector.create();
    StructField field = objectInspector.getAllStructFieldRefs().get(0);
    for (int i = 0; i < rows; i++) {
        if (i % 10 == 0) {
            objectInspector.setStructFieldData(row, field, null);
        } else {
            objectInspector.setStructFieldData(row, field, (long) i);
        }
        Writable record = serde.serialize(row, objectInspector);
        writer.write(record);
    }
    writer.close(false);
    return tempFile;
}
Also used : SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) OrcTester.createSettableStructObjectInspector(com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Writable(org.apache.hadoop.io.Writable) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Aggregations

StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)21 SettableStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector)18 Writable (org.apache.hadoop.io.Writable)16 Serializer (org.apache.hadoop.hive.serde2.Serializer)12 OrcSerde (org.apache.hadoop.hive.ql.io.orc.OrcSerde)9 OrcTester.createSettableStructObjectInspector (com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector)8 RecordWriter (org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter)8 Path (org.apache.hadoop.fs.Path)7 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)7 Properties (java.util.Properties)5 JobConf (org.apache.hadoop.mapred.JobConf)5 Slice (io.airlift.slice.Slice)3 File (java.io.File)3 AbstractIterator (com.google.common.collect.AbstractIterator)2 DataSize (io.airlift.units.DataSize)2 Iterator (java.util.Iterator)2 ParquetHiveSerDe (org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe)2 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)2 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)2 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)2