Search in sources :

Example 1 with OrcSerde

use of org.apache.hadoop.hive.ql.io.orc.OrcSerde in project presto by prestodb.

the class OrcFileWriter method createSerializer.

private static OrcSerde createSerializer(Properties properties) {
    OrcSerde serde = new OrcSerde();
    serde.initialize(CONFIGURATION, properties);
    return serde;
}
Also used : OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde)

Example 2 with OrcSerde

use of org.apache.hadoop.hive.ql.io.orc.OrcSerde in project presto by prestodb.

the class AbstractTestOrcReader method createTempFile.

private static TempFile createTempFile(int nRecords) throws IOException, SerDeException {
    TempFile file = new TempFile();
    RecordWriter writer = createOrcRecordWriter(file.getFile(), ORC_12, CompressionKind.NONE, BIGINT);
    @SuppressWarnings("deprecation") Serializer serde = new OrcSerde();
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", BIGINT);
    Object row = objectInspector.create();
    StructField field = objectInspector.getAllStructFieldRefs().get(0);
    objectInspector.setStructFieldData(row, field, 1L);
    Writable record = serde.serialize(row, objectInspector);
    for (int i = 0; i < nRecords; i++) {
        writer.write(record);
    }
    writer.close(false);
    return file;
}
Also used : SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) OrcTester.createSettableStructObjectInspector(com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) OrcTester.createOrcRecordWriter(com.facebook.presto.orc.OrcTester.createOrcRecordWriter) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Writable(org.apache.hadoop.io.Writable) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Example 3 with OrcSerde

use of org.apache.hadoop.hive.ql.io.orc.OrcSerde in project presto by prestodb.

the class TestOrcReaderMemoryUsage method createSingleColumnFileWithNullValues.

/**
 * Write a file that contains a number of rows with 1 BIGINT column, and some rows have null values.
 */
private static TempFile createSingleColumnFileWithNullValues(int rows) throws IOException, ReflectiveOperationException, SerDeException {
    Serializer serde = new OrcSerde();
    TempFile tempFile = new TempFile();
    FileSinkOperator.RecordWriter writer = createOrcRecordWriter(tempFile.getFile(), ORC_12, CompressionKind.NONE, BIGINT);
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", BIGINT);
    Object row = objectInspector.create();
    StructField field = objectInspector.getAllStructFieldRefs().get(0);
    for (int i = 0; i < rows; i++) {
        if (i % 10 == 0) {
            objectInspector.setStructFieldData(row, field, null);
        } else {
            objectInspector.setStructFieldData(row, field, (long) i);
        }
        Writable record = serde.serialize(row, objectInspector);
        writer.write(record);
    }
    writer.close(false);
    return tempFile;
}
Also used : SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) OrcTester.createSettableStructObjectInspector(com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Writable(org.apache.hadoop.io.Writable) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Example 4 with OrcSerde

use of org.apache.hadoop.hive.ql.io.orc.OrcSerde in project presto by prestodb.

the class TestOrcReaderMemoryUsage method createSingleColumnVarcharFile.

/**
 * Write a file that contains a number of rows with 1 VARCHAR column, and all values are not null.
 */
private static TempFile createSingleColumnVarcharFile(int count, int length) throws Exception {
    Serializer serde = new OrcSerde();
    TempFile tempFile = new TempFile();
    FileSinkOperator.RecordWriter writer = createOrcRecordWriter(tempFile.getFile(), ORC_12, CompressionKind.NONE, VARCHAR);
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", VARCHAR);
    Object row = objectInspector.create();
    StructField field = objectInspector.getAllStructFieldRefs().get(0);
    for (int i = 0; i < count; i++) {
        objectInspector.setStructFieldData(row, field, Strings.repeat("0", length));
        Writable record = serde.serialize(row, objectInspector);
        writer.write(record);
    }
    writer.close(false);
    return tempFile;
}
Also used : SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) OrcTester.createSettableStructObjectInspector(com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Writable(org.apache.hadoop.io.Writable) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Example 5 with OrcSerde

use of org.apache.hadoop.hive.ql.io.orc.OrcSerde in project presto by prestodb.

the class TestOrcReaderMemoryUsage method createSingleColumnMapFileWithNullValues.

/**
 * Write a file that contains a given number of maps where each row has 10 entries in total
 * and some entries have null keys/values.
 */
private static TempFile createSingleColumnMapFileWithNullValues(Type mapType, int rows) throws IOException, ReflectiveOperationException, SerDeException {
    Serializer serde = new OrcSerde();
    TempFile tempFile = new TempFile();
    FileSinkOperator.RecordWriter writer = createOrcRecordWriter(tempFile.getFile(), ORC_12, CompressionKind.NONE, mapType);
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", mapType);
    Object row = objectInspector.create();
    StructField field = objectInspector.getAllStructFieldRefs().get(0);
    for (int i = 1; i <= rows; i++) {
        HashMap<Long, Long> map = new HashMap<>();
        for (int j = 1; j <= 8; j++) {
            Long value = (long) j;
            map.put(value, value);
        }
        // Add null values so that the StreamReader nullVectors are not empty.
        map.put(null, 0L);
        map.put(0L, null);
        objectInspector.setStructFieldData(row, field, map);
        Writable record = serde.serialize(row, objectInspector);
        writer.write(record);
    }
    writer.close(false);
    return tempFile;
}
Also used : FileSinkOperator(org.apache.hadoop.hive.ql.exec.FileSinkOperator) HashMap(java.util.HashMap) Writable(org.apache.hadoop.io.Writable) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) OrcTester.createSettableStructObjectInspector(com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Serializer(org.apache.hadoop.hive.serde2.Serializer)

Aggregations

OrcSerde (org.apache.hadoop.hive.ql.io.orc.OrcSerde)13 SettableStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector)9 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)9 Writable (org.apache.hadoop.io.Writable)9 OrcTester.createSettableStructObjectInspector (com.facebook.presto.orc.OrcTester.createSettableStructObjectInspector)8 Serializer (org.apache.hadoop.hive.serde2.Serializer)8 FileSinkOperator (org.apache.hadoop.hive.ql.exec.FileSinkOperator)7 DataXException (com.alibaba.datax.common.exception.DataXException)2 IOException (java.io.IOException)2 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)2 Record (com.alibaba.datax.common.element.Record)1 Configuration (com.alibaba.datax.common.util.Configuration)1 ColumnEntry (com.alibaba.datax.plugin.unstructuredstorage.reader.ColumnEntry)1 JSONObject (com.alibaba.fastjson.JSONObject)1 OrcTester.createOrcRecordWriter (com.facebook.presto.orc.OrcTester.createOrcRecordWriter)1 HashMap (java.util.HashMap)1 Path (org.apache.hadoop.fs.Path)1 RecordWriter (org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter)1 RCFileRecordReader (org.apache.hadoop.hive.ql.io.RCFileRecordReader)1 OrcInputFormat (org.apache.hadoop.hive.ql.io.orc.OrcInputFormat)1