Search in sources :

Example 26 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.

the class Table method getFields.

public ArrayList<StructField> getFields() {
    ArrayList<StructField> fields = new ArrayList<StructField>();
    try {
        Deserializer decoder = getDeserializer();
        // Expand out all the columns of the table
        StructObjectInspector structObjectInspector = (StructObjectInspector) decoder.getObjectInspector();
        List<? extends StructField> fld_lst = structObjectInspector.getAllStructFieldRefs();
        for (StructField field : fld_lst) {
            fields.add(field);
        }
    } catch (SerDeException e) {
        throw new RuntimeException(e);
    }
    return fields;
}
Also used : StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) ArrayList(java.util.ArrayList) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 27 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.

the class HashTableDummyOperator method initializeOp.

@Override
protected void initializeOp(Configuration hconf) throws HiveException {
    super.initializeOp(hconf);
    TableDesc tbl = this.getConf().getTbl();
    try {
        Deserializer serde = tbl.getDeserializerClass().newInstance();
        SerDeUtils.initializeSerDe(serde, hconf, tbl.getProperties(), null);
        this.outputObjInspector = serde.getObjectInspector();
    } catch (Exception e) {
        LOG.error("Generating output obj inspector from dummy object error", e);
        e.printStackTrace();
    }
}
Also used : Deserializer(org.apache.hadoop.hive.serde2.Deserializer) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException)

Example 28 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.

the class DDLTask method createTableLike.

/**
   * Create a new table like an existing table.
   *
   * @param db
   *          The database in question.
   * @param crtTbl
   *          This is the table we're creating.
   * @return Returns 0 when execution succeeds and above 0 if it fails.
   * @throws HiveException
   *           Throws this exception if an unexpected error occurs.
   */
private int createTableLike(Hive db, CreateTableLikeDesc crtTbl) throws Exception {
    // Get the existing table
    Table oldtbl = db.getTable(crtTbl.getLikeTableName());
    Table tbl;
    if (oldtbl.getTableType() == TableType.VIRTUAL_VIEW || oldtbl.getTableType() == TableType.MATERIALIZED_VIEW) {
        String targetTableName = crtTbl.getTableName();
        tbl = db.newTable(targetTableName);
        if (crtTbl.getTblProps() != null) {
            tbl.getTTable().getParameters().putAll(crtTbl.getTblProps());
        }
        tbl.setTableType(TableType.MANAGED_TABLE);
        if (crtTbl.isExternal()) {
            tbl.setProperty("EXTERNAL", "TRUE");
            tbl.setTableType(TableType.EXTERNAL_TABLE);
        }
        tbl.setFields(oldtbl.getCols());
        tbl.setPartCols(oldtbl.getPartCols());
        if (crtTbl.getDefaultSerName() == null) {
            LOG.info("Default to LazySimpleSerDe for table " + crtTbl.getTableName());
            tbl.setSerializationLib(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());
        } else {
            // let's validate that the serde exists
            validateSerDe(crtTbl.getDefaultSerName());
            tbl.setSerializationLib(crtTbl.getDefaultSerName());
        }
        if (crtTbl.getDefaultSerdeProps() != null) {
            Iterator<Entry<String, String>> iter = crtTbl.getDefaultSerdeProps().entrySet().iterator();
            while (iter.hasNext()) {
                Entry<String, String> m = iter.next();
                tbl.setSerdeParam(m.getKey(), m.getValue());
            }
        }
        tbl.setInputFormatClass(crtTbl.getDefaultInputFormat());
        tbl.setOutputFormatClass(crtTbl.getDefaultOutputFormat());
        tbl.getTTable().getSd().setInputFormat(tbl.getInputFormatClass().getName());
        tbl.getTTable().getSd().setOutputFormat(tbl.getOutputFormatClass().getName());
    } else {
        tbl = oldtbl;
        // find out database name and table name of target table
        String targetTableName = crtTbl.getTableName();
        String[] names = Utilities.getDbTableName(targetTableName);
        tbl.setDbName(names[0]);
        tbl.setTableName(names[1]);
        // using old table object, hence reset the owner to current user for new table.
        tbl.setOwner(SessionState.getUserFromAuthenticator());
        if (crtTbl.getLocation() != null) {
            tbl.setDataLocation(new Path(crtTbl.getLocation()));
        } else {
            tbl.unsetDataLocation();
        }
        Class<? extends Deserializer> serdeClass = oldtbl.getDeserializerClass();
        Map<String, String> params = tbl.getParameters();
        // We should copy only those table parameters that are specified in the config.
        SerDeSpec spec = AnnotationUtils.getAnnotation(serdeClass, SerDeSpec.class);
        String paramsStr = HiveConf.getVar(conf, HiveConf.ConfVars.DDL_CTL_PARAMETERS_WHITELIST);
        Set<String> retainer = new HashSet<String>();
        // for non-native table, property storage_handler should be retained
        retainer.add(META_TABLE_STORAGE);
        if (spec != null && spec.schemaProps() != null) {
            retainer.addAll(Arrays.asList(spec.schemaProps()));
        }
        if (paramsStr != null) {
            retainer.addAll(Arrays.asList(paramsStr.split(",")));
        }
        // Retain Parquet INT96 write zone property to keep Parquet timezone bugfixes.
        if (params.get(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY) != null) {
            retainer.add(ParquetTableUtils.PARQUET_INT96_WRITE_ZONE_PROPERTY);
        }
        if (!retainer.isEmpty()) {
            params.keySet().retainAll(retainer);
        } else {
            params.clear();
        }
        if (crtTbl.getTblProps() != null) {
            params.putAll(crtTbl.getTblProps());
        }
        if (crtTbl.isUserStorageFormat()) {
            tbl.setInputFormatClass(crtTbl.getDefaultInputFormat());
            tbl.setOutputFormatClass(crtTbl.getDefaultOutputFormat());
            tbl.getTTable().getSd().setInputFormat(tbl.getInputFormatClass().getName());
            tbl.getTTable().getSd().setOutputFormat(tbl.getOutputFormatClass().getName());
            if (crtTbl.getDefaultSerName() == null) {
                LOG.info("Default to LazySimpleSerDe for like table " + crtTbl.getTableName());
                tbl.setSerializationLib(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName());
            } else {
                // let's validate that the serde exists
                validateSerDe(crtTbl.getDefaultSerName());
                tbl.setSerializationLib(crtTbl.getDefaultSerName());
            }
        }
        tbl.getTTable().setTemporary(crtTbl.isTemporary());
        if (crtTbl.isExternal()) {
            tbl.setProperty("EXTERNAL", "TRUE");
            tbl.setTableType(TableType.EXTERNAL_TABLE);
        } else {
            tbl.getParameters().remove("EXTERNAL");
        }
    }
    if (!Utilities.isDefaultNameNode(conf)) {
        // If location is specified - ensure that it is a full qualified name
        makeLocationQualified(tbl.getDbName(), tbl.getTTable().getSd(), tbl.getTableName(), conf);
    }
    if (crtTbl.getLocation() == null && !tbl.isPartitioned() && conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
        StatsSetupConst.setBasicStatsStateForCreateTable(tbl.getTTable().getParameters(), StatsSetupConst.TRUE);
    }
    // create the table
    db.createTable(tbl, crtTbl.getIfNotExists());
    addIfAbsentByName(new WriteEntity(tbl, WriteEntity.WriteType.DDL_NO_LOCK));
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) Table(org.apache.hadoop.hive.ql.metadata.Table) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) SerDeSpec(org.apache.hadoop.hive.serde2.SerDeSpec) Entry(java.util.Map.Entry) WriteEntity(org.apache.hadoop.hive.ql.hooks.WriteEntity) HashSet(java.util.HashSet)

Example 29 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.

the class Vectorizer method canSpecializeReduceSink.

private boolean canSpecializeReduceSink(ReduceSinkDesc desc, boolean isTezOrSpark, VectorizationContext vContext, VectorReduceSinkInfo vectorReduceSinkInfo) throws HiveException {
    // Allocate a VectorReduceSinkDesc initially with key type NONE so EXPLAIN can report this
    // operator was vectorized, but not native.  And, the conditions.
    VectorReduceSinkDesc vectorDesc = new VectorReduceSinkDesc();
    desc.setVectorDesc(vectorDesc);
    boolean isVectorizationReduceSinkNativeEnabled = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED);
    String engine = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE);
    boolean hasBuckets = desc.getBucketCols() != null && !desc.getBucketCols().isEmpty();
    boolean hasTopN = desc.getTopN() >= 0;
    boolean useUniformHash = desc.getReducerTraits().contains(UNIFORM);
    boolean hasDistinctColumns = desc.getDistinctColumnIndices().size() > 0;
    TableDesc keyTableDesc = desc.getKeySerializeInfo();
    Class<? extends Deserializer> keySerializerClass = keyTableDesc.getDeserializerClass();
    boolean isKeyBinarySortable = (keySerializerClass == org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe.class);
    TableDesc valueTableDesc = desc.getValueSerializeInfo();
    Class<? extends Deserializer> valueDeserializerClass = valueTableDesc.getDeserializerClass();
    boolean isValueLazyBinary = (valueDeserializerClass == org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe.class);
    // Remember the condition variables for EXPLAIN regardless.
    vectorDesc.setIsVectorizationReduceSinkNativeEnabled(isVectorizationReduceSinkNativeEnabled);
    vectorDesc.setEngine(engine);
    vectorDesc.setHasBuckets(hasBuckets);
    vectorDesc.setHasTopN(hasTopN);
    vectorDesc.setUseUniformHash(useUniformHash);
    vectorDesc.setHasDistinctColumns(hasDistinctColumns);
    vectorDesc.setIsKeyBinarySortable(isKeyBinarySortable);
    vectorDesc.setIsValueLazyBinary(isValueLazyBinary);
    // Many restrictions.
    if (!isVectorizationReduceSinkNativeEnabled || !isTezOrSpark || hasBuckets || hasTopN || !useUniformHash || hasDistinctColumns || !isKeyBinarySortable || !isValueLazyBinary) {
        return false;
    }
    // We are doing work here we'd normally do in VectorGroupByCommonOperator's constructor.
    // So if we later decide not to specialize, we'll just waste any scratch columns allocated...
    List<ExprNodeDesc> keysDescs = desc.getKeyCols();
    VectorExpression[] allKeyExpressions = vContext.getVectorExpressions(keysDescs);
    // Since a key expression can be a calculation and the key will go into a scratch column,
    // we need the mapping and type information.
    int[] reduceSinkKeyColumnMap = new int[allKeyExpressions.length];
    TypeInfo[] reduceSinkKeyTypeInfos = new TypeInfo[allKeyExpressions.length];
    Type[] reduceSinkKeyColumnVectorTypes = new Type[allKeyExpressions.length];
    ArrayList<VectorExpression> groupByKeyExpressionsList = new ArrayList<VectorExpression>();
    VectorExpression[] reduceSinkKeyExpressions;
    for (int i = 0; i < reduceSinkKeyColumnMap.length; i++) {
        VectorExpression ve = allKeyExpressions[i];
        reduceSinkKeyColumnMap[i] = ve.getOutputColumn();
        reduceSinkKeyTypeInfos[i] = keysDescs.get(i).getTypeInfo();
        reduceSinkKeyColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkKeyTypeInfos[i]);
        if (!IdentityExpression.isColumnOnly(ve)) {
            groupByKeyExpressionsList.add(ve);
        }
    }
    if (groupByKeyExpressionsList.size() == 0) {
        reduceSinkKeyExpressions = null;
    } else {
        reduceSinkKeyExpressions = groupByKeyExpressionsList.toArray(new VectorExpression[0]);
    }
    ArrayList<ExprNodeDesc> valueDescs = desc.getValueCols();
    VectorExpression[] allValueExpressions = vContext.getVectorExpressions(valueDescs);
    int[] reduceSinkValueColumnMap = new int[valueDescs.size()];
    TypeInfo[] reduceSinkValueTypeInfos = new TypeInfo[valueDescs.size()];
    Type[] reduceSinkValueColumnVectorTypes = new Type[valueDescs.size()];
    ArrayList<VectorExpression> reduceSinkValueExpressionsList = new ArrayList<VectorExpression>();
    VectorExpression[] reduceSinkValueExpressions;
    for (int i = 0; i < valueDescs.size(); ++i) {
        VectorExpression ve = allValueExpressions[i];
        reduceSinkValueColumnMap[i] = ve.getOutputColumn();
        reduceSinkValueTypeInfos[i] = valueDescs.get(i).getTypeInfo();
        reduceSinkValueColumnVectorTypes[i] = VectorizationContext.getColumnVectorTypeFromTypeInfo(reduceSinkValueTypeInfos[i]);
        if (!IdentityExpression.isColumnOnly(ve)) {
            reduceSinkValueExpressionsList.add(ve);
        }
    }
    if (reduceSinkValueExpressionsList.size() == 0) {
        reduceSinkValueExpressions = null;
    } else {
        reduceSinkValueExpressions = reduceSinkValueExpressionsList.toArray(new VectorExpression[0]);
    }
    vectorReduceSinkInfo.setReduceSinkKeyColumnMap(reduceSinkKeyColumnMap);
    vectorReduceSinkInfo.setReduceSinkKeyTypeInfos(reduceSinkKeyTypeInfos);
    vectorReduceSinkInfo.setReduceSinkKeyColumnVectorTypes(reduceSinkKeyColumnVectorTypes);
    vectorReduceSinkInfo.setReduceSinkKeyExpressions(reduceSinkKeyExpressions);
    vectorReduceSinkInfo.setReduceSinkValueColumnMap(reduceSinkValueColumnMap);
    vectorReduceSinkInfo.setReduceSinkValueTypeInfos(reduceSinkValueTypeInfos);
    vectorReduceSinkInfo.setReduceSinkValueColumnVectorTypes(reduceSinkValueColumnVectorTypes);
    vectorReduceSinkInfo.setReduceSinkValueExpressions(reduceSinkValueExpressions);
    return true;
}
Also used : ArrayList(java.util.ArrayList) VectorReduceSinkDesc(org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc) LazyBinarySerDe(org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe) UDFToString(org.apache.hadoop.hive.ql.udf.UDFToString) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) InConstantType(org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType) HashTableImplementationType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType) HashTableKeyType(org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType) Type(org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type) VectorDeserializeType(org.apache.hadoop.hive.ql.plan.VectorPartitionDesc.VectorDeserializeType) OperatorType(org.apache.hadoop.hive.ql.plan.api.OperatorType) VectorExpression(org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc)

Example 30 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.

the class GenericColumnVectorProducer method createReadPipeline.

@Override
public ReadPipeline createReadPipeline(Consumer<ColumnVectorBatch> consumer, FileSplit split, List<Integer> columnIds, SearchArgument sarg, String[] columnNames, QueryFragmentCounters counters, TypeDescription schema, InputFormat<?, ?> sourceInputFormat, Deserializer sourceSerDe, Reporter reporter, JobConf job, Map<Path, PartitionDesc> parts) throws IOException {
    cacheMetrics.incrCacheReadRequests();
    OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer(consumer, columnIds.size(), false, counters, ioMetrics);
    SerDeFileMetadata fm;
    try {
        fm = new SerDeFileMetadata(sourceSerDe);
    } catch (SerDeException e) {
        throw new IOException(e);
    }
    edc.setFileMetadata(fm);
    // Note that we pass job config to the record reader, but use global config for LLAP IO.
    SerDeEncodedDataReader reader = new SerDeEncodedDataReader(cache, bufferManager, conf, split, columnIds, edc, job, reporter, sourceInputFormat, sourceSerDe, counters, fm.getSchema(), parts);
    edc.init(reader, reader);
    if (LlapIoImpl.LOG.isDebugEnabled()) {
        LlapIoImpl.LOG.debug("Ignoring schema: " + schema);
    }
    return edc;
}
Also used : SerDeEncodedDataReader(org.apache.hadoop.hive.llap.io.encoded.SerDeEncodedDataReader) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Aggregations

Deserializer (org.apache.hadoop.hive.serde2.Deserializer)19 ArrayList (java.util.ArrayList)14 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)14 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)13 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)10 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)10 IOException (java.io.IOException)7 Properties (java.util.Properties)7 Path (org.apache.hadoop.fs.Path)6 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)6 Configuration (org.apache.hadoop.conf.Configuration)5 HashMap (java.util.HashMap)4 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)4 PartitionDesc (org.apache.hadoop.hive.ql.plan.PartitionDesc)4 LazySimpleSerDe (org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe)4 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)4 List (java.util.List)3 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)3 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)3 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)3