Search in sources :

Example 16 with SettableStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.

the class TestOrcReaderPositions method createGrowingSequentialFile.

private static void createGrowingSequentialFile(File file, int count, int step, int initialLength) throws IOException, SerDeException {
    FileSinkOperator.RecordWriter writer = createOrcRecordWriter(file, ORC_12, CompressionKind.NONE, VARCHAR);
    Serializer serde = new OrcSerde();
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", VARCHAR);
    Object row = objectInspector.create();
    StructField field = objectInspector.getAllStructFieldRefs().get(0);
    StringBuilder builder = new StringBuilder();
    for (int i = 0; i < initialLength; i++) {
        builder.append("0");
    }
    String seedString = builder.toString();
    // gradually grow the length of a cell
    int previousLength = initialLength;
    for (int i = 0; i < count; i++) {
        if ((i / step + 1) * initialLength > previousLength) {
            previousLength = (i / step + 1) * initialLength;
            builder.append(seedString);
        }
        objectInspector.setStructFieldData(row, field, builder.toString());
        Writable record = serde.serialize(row, objectInspector);
        writer.write(record);
    }
    writer.close(false);
}
Also used : SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) OrcTester.createSettableStructObjectInspector(com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Writable(org.apache.hadoop.io.Writable) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Example 17 with SettableStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.

the class TestOrcReaderPositions method createSequentialFile.

private static void createSequentialFile(File file, int count) throws IOException, SerDeException {
    FileSinkOperator.RecordWriter writer = createOrcRecordWriter(file, ORC_12, CompressionKind.NONE, BIGINT);
    Serializer serde = new OrcSerde();
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", BIGINT);
    Object row = objectInspector.create();
    StructField field = objectInspector.getAllStructFieldRefs().get(0);
    for (int i = 0; i < count; i++) {
        objectInspector.setStructFieldData(row, field, (long) i);
        Writable record = serde.serialize(row, objectInspector);
        writer.write(record);
    }
    writer.close(false);
}
Also used : SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) OrcTester.createSettableStructObjectInspector(com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Writable(org.apache.hadoop.io.Writable) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Example 18 with SettableStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.

the class TestOrcReaderPositions method createMultiStripeFile.

// write 5 stripes of 20 values each: (0,3,6,..,57), (60,..,117), .., (..297)
private static void createMultiStripeFile(File file) throws IOException, ReflectiveOperationException, SerDeException {
    FileSinkOperator.RecordWriter writer = createOrcRecordWriter(file, ORC_12, CompressionKind.NONE, ImmutableList.of(BIGINT, VARCHAR));
    Serializer serde = new OrcSerde();
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector(ImmutableList.of(BIGINT, VARCHAR));
    Object row = objectInspector.create();
    StructField bigintField = objectInspector.getAllStructFieldRefs().get(0);
    StructField varcharField = objectInspector.getAllStructFieldRefs().get(1);
    for (int i = 0; i < 300; i += 3) {
        if ((i > 0) && (i % 60 == 0)) {
            flushWriter(writer);
        }
        objectInspector.setStructFieldData(row, bigintField, (long) i);
        objectInspector.setStructFieldData(row, varcharField, String.valueOf(i));
        Writable record = serde.serialize(row, objectInspector);
        writer.write(record);
    }
    writer.close(false);
}
Also used : SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) OrcTester.createSettableStructObjectInspector(com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Writable(org.apache.hadoop.io.Writable) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Example 19 with SettableStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.

the class TestOrcMapNullKey method createSingleColumnMapFileWithNullValues.

private static TempFile createSingleColumnMapFileWithNullValues(Type mapType, Map<Long, Long> map) throws IOException {
    OrcSerde serde = new OrcSerde();
    TempFile tempFile = new TempFile();
    FileSinkOperator.RecordWriter writer = createOrcRecordWriter(tempFile.getFile(), ORC_12, CompressionKind.NONE, mapType);
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", mapType);
    Object row = objectInspector.create();
    StructField field = objectInspector.getAllStructFieldRefs().get(0);
    objectInspector.setStructFieldData(row, field, map);
    Writable record = serde.serialize(row, objectInspector);
    writer.write(record);
    writer.close(false);
    return tempFile;
}
Also used : SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) OrcTester.createSettableStructObjectInspector(com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Writable(org.apache.hadoop.io.Writable)

Example 20 with SettableStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector in project presto by prestodb.

the class ParquetTester method nonHiveParquetWriter.

private static void nonHiveParquetWriter(JobConf jobConf, File outputFile, org.apache.parquet.hadoop.metadata.CompressionCodecName compressionCodecName, SettableStructObjectInspector objectInspector, Iterator<?>[] valuesByField, org.apache.parquet.schema.MessageType parquetSchema) throws Exception {
    GroupWriteSupport.setSchema(parquetSchema, jobConf);
    org.apache.parquet.hadoop.ParquetWriter writer = ExampleParquetWriter.builder(new Path(outputFile.toURI())).withType(parquetSchema).withCompressionCodec(compressionCodecName).withConf(jobConf).withDictionaryEncoding(true).build();
    List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
    SimpleGroupFactory groupFactory = new SimpleGroupFactory(parquetSchema);
    while (stream(valuesByField).allMatch(Iterator::hasNext)) {
        Group group = groupFactory.newGroup();
        for (int field = 0; field < fields.size(); field++) {
            Object value = valuesByField[field].next();
            if (value == null) {
                continue;
            }
            String fieldName = fields.get(field).getFieldName();
            String typeName = fields.get(field).getFieldObjectInspector().getTypeName();
            switch(typeName) {
                case "timestamp":
                case "bigint":
                    group.add(fieldName, (long) value);
                    break;
                default:
                    throw new RuntimeException(String.format("unhandled type for column %s type %s", fieldName, typeName));
            }
        }
        writer.write(group);
    }
    writer.close();
}
Also used : Path(org.apache.hadoop.fs.Path) Group(org.apache.parquet.example.data.Group) SimpleGroupFactory(org.apache.parquet.example.data.simple.SimpleGroupFactory) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) AbstractIterator(com.google.common.collect.AbstractIterator) Iterator(java.util.Iterator)

Aggregations

StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)21 SettableStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector)18 Writable (org.apache.hadoop.io.Writable)16 Serializer (org.apache.hadoop.hive.serde2.Serializer)12 OrcSerde (org.apache.hadoop.hive.ql.io.orc.OrcSerde)9 OrcTester.createSettableStructObjectInspector (com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector)8 RecordWriter (org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter)8 Path (org.apache.hadoop.fs.Path)7 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)7 Properties (java.util.Properties)5 JobConf (org.apache.hadoop.mapred.JobConf)5 Slice (io.airlift.slice.Slice)3 File (java.io.File)3 AbstractIterator (com.google.common.collect.AbstractIterator)2 DataSize (io.airlift.units.DataSize)2 Iterator (java.util.Iterator)2 ParquetHiveSerDe (org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe)2 DateWritable (org.apache.hadoop.hive.serde2.io.DateWritable)2 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)2 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)2