Examples with DeserializerOrcWriter - org.apache.hadoop.hive.llap.io.encoded.SerDeEncodedDataReader.DeserializerOrcWriter

Example 1 with DeserializerOrcWriter

use of org.apache.hadoop.hive.llap.io.encoded.SerDeEncodedDataReader.DeserializerOrcWriter in project hive by apache.

the class VectorDeserializeOrcWriter method create.

// TODO: if more writers are added, separate out an EncodingWriterFactory
public static EncodingWriter create(InputFormat<?, ?> sourceIf, Deserializer serDe, Map<Path, PartitionDesc> parts, Configuration daemonConf, Configuration jobConf, Path splitPath, StructObjectInspector sourceOi, List<Integer> sourceIncludes, boolean[] cacheIncludes, int allocSize) throws IOException {
    // Vector SerDe can be disabled both on client and server side.
    if (!HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_IO_ENCODE_VECTOR_SERDE_ENABLED) || !HiveConf.getBoolVar(jobConf, ConfVars.LLAP_IO_ENCODE_VECTOR_SERDE_ENABLED) || !(sourceIf instanceof TextInputFormat) || !(serDe instanceof LazySimpleSerDe)) {
        return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
    }
    Path path = splitPath.getFileSystem(daemonConf).makeQualified(splitPath);
    PartitionDesc partDesc = HiveFileFormatUtils.getFromPathRecursively(parts, path, null);
    if (partDesc == null) {
        LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter: no partition desc for " + path);
        return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
    }
    Properties tblProps = partDesc.getTableDesc().getProperties();
    if ("true".equalsIgnoreCase(tblProps.getProperty(serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST))) {
        LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter due to " + serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST);
        return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
    }
    for (StructField sf : sourceOi.getAllStructFieldRefs()) {
        Category c = sf.getFieldObjectInspector().getCategory();
        if (c != Category.PRIMITIVE) {
            LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter: " + c + " is not supported");
            return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
        }
    }
    LlapIoImpl.LOG.info("Creating VertorDeserializeOrcWriter for " + path);
    return new VectorDeserializeOrcWriter(daemonConf, tblProps, sourceOi, sourceIncludes, cacheIncludes, allocSize);
}

Also used : DeserializerOrcWriter(org.apache.hadoop.hive.llap.io.encoded.SerDeEncodedDataReader.DeserializerOrcWriter) Path(org.apache.hadoop.fs.Path) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) Category(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category) TextInputFormat(org.apache.hadoop.mapred.TextInputFormat) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) PartitionDesc(org.apache.hadoop.hive.ql.plan.PartitionDesc) Properties(java.util.Properties)

Example 2 with DeserializerOrcWriter

use of org.apache.hadoop.hive.llap.io.encoded.SerDeEncodedDataReader.DeserializerOrcWriter in project hive by apache.

the class VectorDeserializeOrcWriter method create.

// TODO: if more writers are added, separate out an EncodingWriterFactory
public static EncodingWriter create(InputFormat<?, ?> sourceIf, Deserializer serDe, Map<Path, PartitionDesc> parts, Configuration daemonConf, Configuration jobConf, Path splitPath, StructObjectInspector sourceOi, List<Integer> sourceIncludes, boolean[] cacheIncludes, int allocSize, ExecutorService encodeExecutor) throws IOException {
    // Vector SerDe can be disabled both on client and server side.
    if (!HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_IO_ENCODE_VECTOR_SERDE_ENABLED) || !HiveConf.getBoolVar(jobConf, ConfVars.LLAP_IO_ENCODE_VECTOR_SERDE_ENABLED) || !(sourceIf instanceof TextInputFormat) || !(serDe instanceof LazySimpleSerDe)) {
        return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
    }
    Path path = splitPath.getFileSystem(daemonConf).makeQualified(splitPath);
    PartitionDesc partDesc = HiveFileFormatUtils.getFromPathRecursively(parts, path, null);
    if (partDesc == null) {
        LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter: no partition desc for " + path);
        return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
    }
    Properties tblProps = partDesc.getTableDesc().getProperties();
    if ("true".equalsIgnoreCase(tblProps.getProperty(serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST))) {
        LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter due to " + serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST);
        return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
    }
    for (StructField sf : sourceOi.getAllStructFieldRefs()) {
        Category c = sf.getFieldObjectInspector().getCategory();
        if (c != Category.PRIMITIVE) {
            LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter: " + c + " is not supported");
            return new DeserializerOrcWriter(serDe, sourceOi, allocSize);
        }
    }
    LlapIoImpl.LOG.info("Creating VertorDeserializeOrcWriter for " + path);
    return new VectorDeserializeOrcWriter(jobConf, tblProps, sourceOi, sourceIncludes, cacheIncludes, allocSize, encodeExecutor);
}

Aggregations

Properties (java.util.Properties)2 Path (org.apache.hadoop.fs.Path)2 DeserializerOrcWriter (org.apache.hadoop.hive.llap.io.encoded.SerDeEncodedDataReader.DeserializerOrcWriter)2 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)2 LazySimpleSerDe (org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe)2 Category (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category)2 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)2 TextInputFormat (org.apache.hadoop.mapred.TextInputFormat)2