Search in sources :

Example 11 with SettableStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.

the class TestOrcPageSourceMemoryTracking method createTestFile.

public static FileSplit createTestFile(String filePath, HiveOutputFormat<?, ?> outputFormat, @SuppressWarnings("deprecation") SerDe serDe, String compressionCodec, List<TestColumn> testColumns, int numRows) throws Exception {
    // filter out partition keys, which are not written to the file
    testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey)));
    Properties tableProperties = new Properties();
    tableProperties.setProperty("columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName)));
    tableProperties.setProperty("columns.types", Joiner.on(',').join(transform(testColumns, TestColumn::getType)));
    serDe.initialize(CONFIGURATION, tableProperties);
    JobConf jobConf = new JobConf();
    if (compressionCodec != null) {
        CompressionCodec codec = new CompressionCodecFactory(CONFIGURATION).getCodecByName(compressionCodec);
        jobConf.set(COMPRESS_CODEC, codec.getClass().getName());
        jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString());
    }
    RecordWriter recordWriter = createRecordWriter(new Path(filePath), CONFIGURATION);
    try {
        SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(ImmutableList.copyOf(transform(testColumns, TestColumn::getName)), ImmutableList.copyOf(transform(testColumns, TestColumn::getObjectInspector)));
        Object row = objectInspector.create();
        List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
        for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
            for (int i = 0; i < testColumns.size(); i++) {
                Object writeValue = testColumns.get(i).getWriteValue();
                if (writeValue instanceof Slice) {
                    writeValue = ((Slice) writeValue).getBytes();
                }
                objectInspector.setStructFieldData(row, fields.get(i), writeValue);
            }
            Writable record = serDe.serialize(row, objectInspector);
            recordWriter.write(record);
            if (rowNumber % STRIPE_ROWS == STRIPE_ROWS - 1) {
                flushStripe(recordWriter);
            }
        }
    } finally {
        recordWriter.close(false);
    }
    Path path = new Path(filePath);
    path.getFileSystem(CONFIGURATION).setVerifyChecksum(true);
    File file = new File(filePath);
    return new FileSplit(path, 0, file.length(), new String[0]);
}
Also used : Path(org.apache.hadoop.fs.Path) Writable(org.apache.hadoop.io.Writable) Properties(java.util.Properties) FileSplit(org.apache.hadoop.mapred.FileSplit) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) CompressionCodecFactory(org.apache.hadoop.io.compress.CompressionCodecFactory) Slice(io.airlift.slice.Slice) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) JobConf(org.apache.hadoop.mapred.JobConf) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File)

Example 12 with SettableStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.

the class ParquetTester method writeParquetColumn.

private static DataSize writeParquetColumn(JobConf jobConf, File outputFile, CompressionCodecName compressionCodecName, ObjectInspector columnObjectInspector, Iterator<?> values) throws Exception {
    RecordWriter recordWriter = new MapredParquetOutputFormat().getHiveRecordWriter(jobConf, new Path(outputFile.toURI()), Text.class, compressionCodecName != UNCOMPRESSED, createTableProperties("test", columnObjectInspector.getTypeName()), () -> {
    });
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", columnObjectInspector);
    Object row = objectInspector.create();
    List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
    int i = 0;
    while (values.hasNext()) {
        Object value = values.next();
        objectInspector.setStructFieldData(row, fields.get(0), value);
        ParquetHiveSerDe serde = new ParquetHiveSerDe();
        serde.initialize(jobConf, createTableProperties("test", columnObjectInspector.getTypeName()), null);
        Writable record = serde.serialize(row, objectInspector);
        recordWriter.write(record);
        i++;
    }
    recordWriter.close(false);
    return succinctBytes(outputFile.length());
}
Also used : Path(org.apache.hadoop.fs.Path) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MapredParquetOutputFormat(org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat) ParquetHiveSerDe(org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe) Writable(org.apache.hadoop.io.Writable)

Example 13 with SettableStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project nifi by apache.

the class NiFiOrcUtils method createOrcStruct.

/**
 * Create an object of OrcStruct given a TypeInfo and a list of objects
 *
 * @param typeInfo The TypeInfo object representing the ORC record schema
 * @param objs     ORC objects/Writables
 * @return an OrcStruct containing the specified objects for the specified schema
 */
public static OrcStruct createOrcStruct(TypeInfo typeInfo, Object... objs) {
    SettableStructObjectInspector oi = (SettableStructObjectInspector) OrcStruct.createObjectInspector(typeInfo);
    List<StructField> fields = (List<StructField>) oi.getAllStructFieldRefs();
    OrcStruct result = (OrcStruct) oi.create();
    result.setNumFields(fields.size());
    for (int i = 0; i < fields.size(); i++) {
        oi.setStructFieldData(result, fields.get(i), objs[i]);
    }
    return result;
}
Also used : SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ArrayList(java.util.ArrayList) List(java.util.List)

Example 14 with SettableStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project hive by apache.

the class VectorExpressionWriterFactory method getSettableExpressionWriters.

public static VectorExpressionWriter[] getSettableExpressionWriters(SettableStructObjectInspector objInspector) throws HiveException {
    List<? extends StructField> fieldsRef = objInspector.getAllStructFieldRefs();
    VectorExpressionWriter[] writers = new VectorExpressionWriter[fieldsRef.size()];
    for (int i = 0; i < writers.length; ++i) {
        StructField fieldRef = fieldsRef.get(i);
        VectorExpressionWriter baseWriter = genVectorExpressionWritable(fieldRef.getFieldObjectInspector());
        writers[i] = genVectorExpressionWritable(objInspector, fieldRef, baseWriter);
    }
    return writers;
}
Also used : StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField)

Example 15 with SettableStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.

the class ParquetTestUtils method writeParquetColumnHive.

static void writeParquetColumnHive(File file, String columnName, boolean nullable, Type type, Iterator<?> values) throws Exception {
    JobConf jobConf = new JobConf();
    // Set this config to get around the issue of LocalFileSystem not getting registered when running the benchmarks using
    // the standalone jar with all dependencies
    jobConf.set("fs.file.impl", LocalFileSystem.class.getCanonicalName());
    jobConf.setLong(ParquetOutputFormat.BLOCK_SIZE, new DataSize(256, MEGABYTE).toBytes());
    jobConf.setLong(ParquetOutputFormat.PAGE_SIZE, new DataSize(100, KILOBYTE).toBytes());
    jobConf.set(ParquetOutputFormat.COMPRESSION, "snappy");
    Properties properties = new Properties();
    properties.setProperty("columns", columnName);
    properties.setProperty("columns.types", getHiveType(type));
    RecordWriter recordWriter = createParquetWriter(nullable, new Path(file.getAbsolutePath()), jobConf, properties, true);
    List<ObjectInspector> objectInspectors = getRowObjectInspectors(type);
    SettableStructObjectInspector tableObjectInspector = getStandardStructObjectInspector(ImmutableList.of(columnName), objectInspectors);
    Object row = tableObjectInspector.create();
    StructField structField = tableObjectInspector.getStructFieldRef(columnName);
    Setter setter = getSetter(type, tableObjectInspector, row, structField);
    Serializer serializer = initializeSerializer(jobConf, properties);
    while (values.hasNext()) {
        Object value = values.next();
        if (value == null) {
            tableObjectInspector.setStructFieldData(row, structField, null);
        } else {
            setter.set(value);
        }
        recordWriter.write(serializer.serialize(row, tableObjectInspector));
    }
    recordWriter.close(false);
}
Also used : Path(org.apache.hadoop.fs.Path) PrimitiveObjectInspectorFactory.javaByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteObjectInspector) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) PrimitiveObjectInspectorFactory.javaDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDateObjectInspector) PrimitiveObjectInspectorFactory.writableTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.writableTimestampObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) PrimitiveObjectInspectorFactory.writableFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.writableFloatObjectInspector) PrimitiveObjectInspectorFactory.javaShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaShortObjectInspector) PrimitiveObjectInspectorFactory.writableBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.writableBooleanObjectInspector) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) PrimitiveObjectInspectorFactory.writableDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.writableDoubleObjectInspector) PrimitiveObjectInspectorFactory.writableLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.writableLongObjectInspector) PrimitiveObjectInspectorFactory.writableByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.writableByteObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector) PrimitiveObjectInspectorFactory.writableHiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.writableHiveCharObjectInspector) PrimitiveObjectInspectorFactory.writableShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.writableShortObjectInspector) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) PrimitiveObjectInspectorFactory.writableStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.writableStringObjectInspector) PrimitiveObjectInspectorFactory.writableIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.writableIntObjectInspector) Properties(java.util.Properties) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) LocalFileSystem(org.apache.hadoop.fs.LocalFileSystem) DataSize(io.airlift.units.DataSize) JobConf(org.apache.hadoop.mapred.JobConf) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Aggregations

StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)21 SettableStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector)18 Writable (org.apache.hadoop.io.Writable)16 Serializer (org.apache.hadoop.hive.serde2.Serializer)12 OrcSerde (org.apache.hadoop.hive.ql.io.orc.OrcSerde)9 OrcTester.createSettableStructObjectInspector (com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector)8 RecordWriter (org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter)8 Path (org.apache.hadoop.fs.Path)7 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)7 Properties (java.util.Properties)5 JobConf (org.apache.hadoop.mapred.JobConf)5 Slice (io.airlift.slice.Slice)3 File (java.io.File)3 AbstractIterator (com.google.common.collect.AbstractIterator)2 DataSize (io.airlift.units.DataSize)2 Iterator (java.util.Iterator)2 ParquetHiveSerDe (org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe)2 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)2 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)2 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)2