Search in sources :

Example 21 with Serializer

use of org.apache.hadoop.hive.serde2.Serializer in project presto by prestodb.

the class OrcTester method writeOrcFileColumnOld.

public static DataSize writeOrcFileColumnOld(File outputFile, Format format, RecordWriter recordWriter, ObjectInspector columnObjectInspector, Iterator<?> values) throws Exception {
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", columnObjectInspector);
    Object row = objectInspector.create();
    List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
    int i = 0;
    TypeInfo typeInfo = getTypeInfoFromTypeString(columnObjectInspector.getTypeName());
    while (values.hasNext()) {
        Object value = values.next();
        value = preprocessWriteValueOld(typeInfo, value);
        objectInspector.setStructFieldData(row, fields.get(0), value);
        @SuppressWarnings("deprecation") Serializer serde;
        if (DWRF == format) {
            serde = new com.facebook.hive.orc.OrcSerde();
            if (i == 142_345) {
                setDwrfLowMemoryFlag(recordWriter);
            }
        } else {
            serde = new OrcSerde();
        }
        Writable record = serde.serialize(row, objectInspector);
        recordWriter.write(record);
        i++;
    }
    recordWriter.close(false);
    return succinctBytes(outputFile.length());
}
Also used : Writable(org.apache.hadoop.io.Writable) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) MapTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) ListTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Example 22 with Serializer

use of org.apache.hadoop.hive.serde2.Serializer in project presto by prestodb.

the class TestOrcReaderPositions method createMultiStripeFile.

// write 5 stripes of 20 values each: (0,3,6,..,57), (60,..,117), .., (..297)
private static void createMultiStripeFile(File file) throws IOException, ReflectiveOperationException, SerDeException {
    FileSinkOperator.RecordWriter writer = createOrcRecordWriter(file, ORC_12, OrcTester.Compression.NONE, javaLongObjectInspector);
    @SuppressWarnings("deprecation") Serializer serde = new OrcSerde();
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", javaLongObjectInspector);
    Object row = objectInspector.create();
    StructField field = objectInspector.getAllStructFieldRefs().get(0);
    for (int i = 0; i < 300; i += 3) {
        if ((i > 0) && (i % 60 == 0)) {
            flushWriter(writer);
        }
        objectInspector.setStructFieldData(row, field, (long) i);
        Writable record = serde.serialize(row, objectInspector);
        writer.write(record);
    }
    writer.close(false);
}
Also used : SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) OrcTester.createSettableStructObjectInspector(com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Writable(org.apache.hadoop.io.Writable) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Example 23 with Serializer

use of org.apache.hadoop.hive.serde2.Serializer in project presto by prestodb.

the class TestOrcReaderPositions method createSequentialFile.

private static void createSequentialFile(File file, int count) throws IOException, ReflectiveOperationException, SerDeException {
    FileSinkOperator.RecordWriter writer = createOrcRecordWriter(file, ORC_12, OrcTester.Compression.NONE, javaLongObjectInspector);
    @SuppressWarnings("deprecation") Serializer serde = new OrcSerde();
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", javaLongObjectInspector);
    Object row = objectInspector.create();
    StructField field = objectInspector.getAllStructFieldRefs().get(0);
    for (int i = 0; i < count; i++) {
        objectInspector.setStructFieldData(row, field, (long) i);
        Writable record = serde.serialize(row, objectInspector);
        writer.write(record);
    }
    writer.close(false);
}
Also used : SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) OrcTester.createSettableStructObjectInspector(com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Writable(org.apache.hadoop.io.Writable) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Example 24 with Serializer

use of org.apache.hadoop.hive.serde2.Serializer in project presto by prestodb.

the class RcFileTester method writeRcFileColumnOld.

private static DataSize writeRcFileColumnOld(File outputFile, Format format, Compression compression, Type type, Iterator<?> values) throws Exception {
    ObjectInspector columnObjectInspector = getJavaObjectInspector(type);
    RecordWriter recordWriter = createRcFileWriterOld(outputFile, compression, columnObjectInspector);
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", columnObjectInspector);
    Object row = objectInspector.create();
    List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
    @SuppressWarnings("deprecation") Serializer serializer = format.createSerializer();
    Properties tableProperties = new Properties();
    tableProperties.setProperty("columns", "test");
    tableProperties.setProperty("columns.types", objectInspector.getTypeName());
    serializer.initialize(new JobConf(false), tableProperties);
    while (values.hasNext()) {
        Object value = values.next();
        value = preprocessWriteValueOld(type, value);
        objectInspector.setStructFieldData(row, fields.get(0), value);
        Writable record = serializer.serialize(row, objectInspector);
        recordWriter.write(record);
    }
    recordWriter.close(false);
    return new DataSize(outputFile.length(), BYTE).convertToMostSuccinctDataSize();
}
Also used : SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) PrimitiveObjectInspectorFactory.javaByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteObjectInspector) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) PrimitiveObjectInspectorFactory.javaDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDateObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) PrimitiveObjectInspectorFactory.javaShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaShortObjectInspector) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) DataSize(io.airlift.units.DataSize) DateWritable(org.apache.hadoop.hive.serde2.io.DateWritable) Writable(org.apache.hadoop.io.Writable) IntWritable(org.apache.hadoop.io.IntWritable) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) LongWritable(org.apache.hadoop.io.LongWritable) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) ByteWritable(org.apache.hadoop.io.ByteWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) TimestampWritable(org.apache.hadoop.hive.serde2.io.TimestampWritable) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) StructObject(org.apache.hadoop.hive.serde2.StructObject) Properties(java.util.Properties) JobConf(org.apache.hadoop.mapred.JobConf) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Aggregations

TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)11 LazySerDeParameters (org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters)10 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)10 Properties (java.util.Properties)9 Test (org.junit.Test)9 Mutation (org.apache.accumulo.core.data.Mutation)8 Configuration (org.apache.hadoop.conf.Configuration)8 ByteArrayRef (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)8 LazyStruct (org.apache.hadoop.hive.serde2.lazy.LazyStruct)8 JobConf (org.apache.hadoop.mapred.JobConf)8 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)7 Serializer (org.apache.hadoop.hive.serde2.Serializer)7 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)6 ArrayList (java.util.ArrayList)5 ColumnVisibility (org.apache.accumulo.core.security.ColumnVisibility)5 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)5 Text (org.apache.hadoop.io.Text)5 Entry (java.util.Map.Entry)4 Connector (org.apache.accumulo.core.client.Connector)4 Instance (org.apache.accumulo.core.client.Instance)4