Search in sources :

Example 21 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class Table method shouldStoreFieldsInMetastore.

public static boolean shouldStoreFieldsInMetastore(HiveConf conf, String serdeLib, Map<String, String> tableParams) {
    if (hasMetastoreBasedSchema(conf, serdeLib)) {
        return true;
    }
    if (HiveConf.getBoolVar(conf, ConfVars.HIVE_LEGACY_SCHEMA_FOR_ALL_SERDES)) {
        return true;
    }
    // Table may or may not be using metastore. Only the SerDe can tell us.
    AbstractSerDe deserializer = null;
    try {
        Class<?> clazz = conf.getClassByName(serdeLib);
        if (!AbstractSerDe.class.isAssignableFrom(clazz)) {
            // The default.
            return true;
        }
        deserializer = ReflectionUtil.newInstance(conf.getClassByName(serdeLib).asSubclass(AbstractSerDe.class), conf);
    } catch (Exception ex) {
        LOG.warn("Cannot initialize SerDe: " + serdeLib + ", ignoring", ex);
        return true;
    }
    return deserializer.shouldStoreFieldsInMetastore(tableParams);
}
Also used : AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) IOException(java.io.IOException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 22 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class DDLTask method describeTable.

/**
 * Write the description of a table to a file.
 *
 * @param db
 *          The database in question.
 * @param descTbl
 *          This is the table we're interested in.
 * @return Returns 0 when execution succeeds and above 0 if it fails.
 * @throws HiveException
 *           Throws this exception if an unexpected error occurs.
 * @throws MetaException
 */
private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException, MetaException {
    String colPath = descTbl.getColumnPath();
    String tableName = descTbl.getTableName();
    // describe the table - populate the output stream
    Table tbl = db.getTable(tableName, false);
    if (tbl == null) {
        throw new HiveException(ErrorMsg.INVALID_TABLE, tableName);
    }
    Partition part = null;
    if (descTbl.getPartSpec() != null) {
        part = db.getPartition(tbl, descTbl.getPartSpec(), false);
        if (part == null) {
            throw new HiveException(ErrorMsg.INVALID_PARTITION, StringUtils.join(descTbl.getPartSpec().keySet(), ','), tableName);
        }
        tbl = part.getTable();
    }
    DataOutputStream outStream = getOutputStream(descTbl.getResFile());
    try {
        LOG.debug("DDLTask: got data for {}", tableName);
        List<FieldSchema> cols = null;
        List<ColumnStatisticsObj> colStats = null;
        Deserializer deserializer = tbl.getDeserializer(true);
        if (deserializer instanceof AbstractSerDe) {
            String errorMsgs = ((AbstractSerDe) deserializer).getConfigurationErrors();
            if (errorMsgs != null && !errorMsgs.isEmpty()) {
                throw new SQLException(errorMsgs);
            }
        }
        if (colPath.equals(tableName)) {
            cols = (part == null || tbl.getTableType() == TableType.VIRTUAL_VIEW) ? tbl.getCols() : part.getCols();
            if (!descTbl.isFormatted()) {
                cols.addAll(tbl.getPartCols());
            }
            if (tbl.isPartitioned() && part == null) {
                // No partitioned specified for partitioned table, lets fetch all.
                Map<String, String> tblProps = tbl.getParameters() == null ? new HashMap<String, String>() : tbl.getParameters();
                Map<String, Long> valueMap = new HashMap<>();
                Map<String, Boolean> stateMap = new HashMap<>();
                for (String stat : StatsSetupConst.supportedStats) {
                    valueMap.put(stat, 0L);
                    stateMap.put(stat, true);
                }
                PartitionIterable parts = new PartitionIterable(db, tbl, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
                int numParts = 0;
                for (Partition partition : parts) {
                    Map<String, String> props = partition.getParameters();
                    Boolean state = StatsSetupConst.areBasicStatsUptoDate(props);
                    for (String stat : StatsSetupConst.supportedStats) {
                        stateMap.put(stat, stateMap.get(stat) && state);
                        if (props != null && props.get(stat) != null) {
                            valueMap.put(stat, valueMap.get(stat) + Long.parseLong(props.get(stat)));
                        }
                    }
                    numParts++;
                }
                for (String stat : StatsSetupConst.supportedStats) {
                    StatsSetupConst.setBasicStatsState(tblProps, Boolean.toString(stateMap.get(stat)));
                    tblProps.put(stat, valueMap.get(stat).toString());
                }
                tblProps.put(StatsSetupConst.NUM_PARTITIONS, Integer.toString(numParts));
                tbl.setParameters(tblProps);
            }
        } else {
            if (descTbl.isFormatted()) {
                // when column name is specified in describe table DDL, colPath will
                // will be table_name.column_name
                String colName = colPath.split("\\.")[1];
                String[] dbTab = Utilities.getDbTableName(tableName);
                List<String> colNames = new ArrayList<String>();
                colNames.add(colName.toLowerCase());
                if (null == part) {
                    if (tbl.isPartitioned()) {
                        Map<String, String> tblProps = tbl.getParameters() == null ? new HashMap<String, String>() : tbl.getParameters();
                        if (tbl.isPartitionKey(colNames.get(0))) {
                            FieldSchema partCol = tbl.getPartColByName(colNames.get(0));
                            cols = Collections.singletonList(partCol);
                            PartitionIterable parts = new PartitionIterable(db, tbl, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
                            ColumnInfo ci = new ColumnInfo(partCol.getName(), TypeInfoUtils.getTypeInfoFromTypeString(partCol.getType()), null, false);
                            ColStatistics cs = StatsUtils.getColStatsForPartCol(ci, parts, conf);
                            ColumnStatisticsData data = new ColumnStatisticsData();
                            ColStatistics.Range r = cs.getRange();
                            StatObjectConverter.fillColumnStatisticsData(partCol.getType(), data, r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue.toString(), r == null ? null : r.maxValue.toString(), cs.getNumNulls(), cs.getCountDistint(), null, cs.getAvgColLen(), cs.getAvgColLen(), cs.getNumTrues(), cs.getNumFalses());
                            ColumnStatisticsObj cso = new ColumnStatisticsObj(partCol.getName(), partCol.getType(), data);
                            colStats = Collections.singletonList(cso);
                            StatsSetupConst.setColumnStatsState(tblProps, colNames);
                        } else {
                            cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
                            List<String> parts = db.getPartitionNames(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), (short) -1);
                            AggrStats aggrStats = db.getAggrColStatsFor(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), colNames, parts);
                            colStats = aggrStats.getColStats();
                            if (parts.size() == aggrStats.getPartsFound()) {
                                StatsSetupConst.setColumnStatsState(tblProps, colNames);
                            } else {
                                StatsSetupConst.removeColumnStatsState(tblProps, colNames);
                            }
                        }
                        tbl.setParameters(tblProps);
                    } else {
                        cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
                        colStats = db.getTableColumnStatistics(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), colNames);
                    }
                } else {
                    List<String> partitions = new ArrayList<String>();
                    partitions.add(part.getName());
                    cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
                    colStats = db.getPartitionColumnStatistics(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), partitions, colNames).get(part.getName());
                }
            } else {
                cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
            }
        }
        PrimaryKeyInfo pkInfo = null;
        ForeignKeyInfo fkInfo = null;
        UniqueConstraint ukInfo = null;
        NotNullConstraint nnInfo = null;
        DefaultConstraint dInfo = null;
        CheckConstraint cInfo = null;
        if (descTbl.isExt() || descTbl.isFormatted()) {
            pkInfo = db.getPrimaryKeys(tbl.getDbName(), tbl.getTableName());
            fkInfo = db.getForeignKeys(tbl.getDbName(), tbl.getTableName());
            ukInfo = db.getUniqueConstraints(tbl.getDbName(), tbl.getTableName());
            nnInfo = db.getNotNullConstraints(tbl.getDbName(), tbl.getTableName());
            dInfo = db.getDefaultConstraints(tbl.getDbName(), tbl.getTableName());
            cInfo = db.getCheckConstraints(tbl.getDbName(), tbl.getTableName());
        }
        fixDecimalColumnTypeName(cols);
        // In case the query is served by HiveServer2, don't pad it with spaces,
        // as HiveServer2 output is consumed by JDBC/ODBC clients.
        boolean isOutputPadded = !SessionState.get().isHiveServerQuery();
        formatter.describeTable(outStream, colPath, tableName, tbl, part, cols, descTbl.isFormatted(), descTbl.isExt(), isOutputPadded, colStats, pkInfo, fkInfo, ukInfo, nnInfo, dInfo, cInfo);
        LOG.debug("DDLTask: written data for {}", tableName);
    } catch (SQLException e) {
        throw new HiveException(e, ErrorMsg.GENERIC_ERROR, tableName);
    } finally {
        IOUtils.closeStream(outStream);
    }
    return 0;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SQLException(java.sql.SQLException) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) AggrStats(org.apache.hadoop.hive.metastore.api.AggrStats) DataOutputStream(java.io.DataOutputStream) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) UniqueConstraint(org.apache.hadoop.hive.ql.metadata.UniqueConstraint) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) PrimaryKeyInfo(org.apache.hadoop.hive.ql.metadata.PrimaryKeyInfo) ForeignKeyInfo(org.apache.hadoop.hive.ql.metadata.ForeignKeyInfo) ColStatistics(org.apache.hadoop.hive.ql.plan.ColStatistics) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) TextMetaDataTable(org.apache.hadoop.hive.ql.metadata.formatting.TextMetaDataTable) Table(org.apache.hadoop.hive.ql.metadata.Table) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) UniqueConstraint(org.apache.hadoop.hive.ql.metadata.UniqueConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) PartitionIterable(org.apache.hadoop.hive.ql.metadata.PartitionIterable) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Example 23 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class ReduceRecordSource method init.

void init(JobConf jconf, Operator<?> reducer, boolean vectorized, TableDesc keyTableDesc, TableDesc valueTableDesc, Reader reader, boolean handleGroupKey, byte tag, VectorizedRowBatchCtx batchContext, long vectorizedVertexNum, int vectorizedTestingReducerBatchSize) throws Exception {
    this.vectorizedVertexNum = vectorizedVertexNum;
    if (vectorizedTestingReducerBatchSize > VectorizedRowBatch.DEFAULT_SIZE) {
        // For now, we don't go higher than the default batch size unless we do more work
        // to verify every vectorized operator downstream can handle a larger batch size.
        vectorizedTestingReducerBatchSize = VectorizedRowBatch.DEFAULT_SIZE;
    }
    this.vectorizedTestingReducerBatchSize = vectorizedTestingReducerBatchSize;
    ObjectInspector keyObjectInspector;
    this.reducer = reducer;
    this.vectorized = vectorized;
    this.keyTableDesc = keyTableDesc;
    if (reader instanceof KeyValueReader) {
        this.reader = new KeyValuesFromKeyValue((KeyValueReader) reader);
    } else {
        this.reader = new KeyValuesFromKeyValues((KeyValuesReader) reader);
    }
    this.handleGroupKey = handleGroupKey;
    this.tag = tag;
    try {
        inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
        keyObjectInspector = inputKeyDeserializer.getObjectInspector();
        if (vectorized) {
            keyStructInspector = (StructObjectInspector) keyObjectInspector;
            firstValueColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
        }
        // We should initialize the SerDe with the TypeInfo when available.
        this.valueTableDesc = valueTableDesc;
        inputValueDeserializer = (AbstractSerDe) ReflectionUtils.newInstance(valueTableDesc.getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(inputValueDeserializer, null, valueTableDesc.getProperties(), null);
        valueObjectInspector = inputValueDeserializer.getObjectInspector();
        ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
        if (vectorized) {
            /* vectorization only works with struct object inspectors */
            valueStructInspectors = (StructObjectInspector) valueObjectInspector;
            final int totalColumns = firstValueColumnOffset + valueStructInspectors.getAllStructFieldRefs().size();
            rowObjectInspector = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspectors);
            batch = batchContext.createVectorizedRowBatch();
            // Setup vectorized deserialization for the key and value.
            BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeyDeserializer;
            keyBinarySortableDeserializeToRow = new VectorDeserializeRow<BinarySortableDeserializeRead>(new BinarySortableDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(keyStructInspector), /* useExternalBuffer */
            true, binarySortableSerDe.getSortOrders(), binarySortableSerDe.getNullMarkers(), binarySortableSerDe.getNotNullMarkers()));
            keyBinarySortableDeserializeToRow.init(0);
            final int valuesSize = valueStructInspectors.getAllStructFieldRefs().size();
            if (valuesSize > 0) {
                valueLazyBinaryDeserializeToRow = new VectorDeserializeRow<LazyBinaryDeserializeRead>(new LazyBinaryDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(valueStructInspectors), /* useExternalBuffer */
                true));
                valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset);
                // Create data buffers for value bytes column vectors.
                for (int i = firstValueColumnOffset; i < batch.numCols; i++) {
                    ColumnVector colVector = batch.cols[i];
                    if (colVector instanceof BytesColumnVector) {
                        BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector;
                        bytesColumnVector.initBuffer();
                    }
                }
            }
        } else {
            ois.add(keyObjectInspector);
            ois.add(valueObjectInspector);
            rowObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
        }
    } catch (Throwable e) {
        abort = true;
        if (e instanceof OutOfMemoryError) {
            // Don't create a new object if we are already out of memory
            throw (OutOfMemoryError) e;
        } else {
            throw new RuntimeException("Reduce operator initialization failed", e);
        }
    }
    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.TEZ_INIT_OPERATORS);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) BinarySortableSerDe(org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe) KeyValueReader(org.apache.tez.runtime.library.api.KeyValueReader) ArrayList(java.util.ArrayList) BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) ColumnVector(org.apache.hadoop.hive.ql.exec.vector.ColumnVector) KeyValuesReader(org.apache.tez.runtime.library.api.KeyValuesReader) BytesColumnVector(org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead)

Example 24 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class FlatRowContainer method add.

/**
 * Called when loading the hashtable.
 */
public void add(MapJoinObjectSerDeContext context, BytesWritable value) throws HiveException {
    AbstractSerDe serde = context.getSerDe();
    // has tag => need to set later
    isAliasFilterSet = !context.hasFilterTag();
    if (rowLength == UNKNOWN) {
        try {
            rowLength = ObjectInspectorUtils.getStructSize(serde.getObjectInspector());
        } catch (SerDeException ex) {
            throw new HiveException("Get structure size error", ex);
        }
        if (rowLength == 0) {
            array = EMPTY_OBJECT_ARRAY;
        }
    }
    if (rowLength > 0) {
        int rowCount = (array.length / rowLength);
        listRealloc(array.length + rowLength);
        read(serde, value, rowCount);
    } else {
        // see rowLength javadoc
        --rowLength;
    }
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Example 25 with AbstractSerDe

use of org.apache.hadoop.hive.serde2.AbstractSerDe in project hive by apache.

the class MapJoinEagerRowContainer method read.

@SuppressWarnings("unchecked")
public void read(MapJoinObjectSerDeContext context, Writable currentValue) throws SerDeException {
    AbstractSerDe serde = context.getSerDe();
    List<Object> value = (List<Object>) ObjectInspectorUtils.copyToStandardObject(serde.deserialize(currentValue), serde.getObjectInspector(), ObjectInspectorCopyOption.WRITABLE);
    if (value == null) {
        addRow(toList(EMPTY_OBJECT_ARRAY));
    } else {
        Object[] valuesArray = value.toArray();
        if (context.hasFilterTag()) {
            aliasFilter &= ((ShortWritable) valuesArray[valuesArray.length - 1]).get();
        }
        addRow(toList(valuesArray));
    }
}
Also used : AbstractList(java.util.AbstractList) ArrayList(java.util.ArrayList) List(java.util.List) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe)

Aggregations

AbstractSerDe (org.apache.hadoop.hive.serde2.AbstractSerDe)43 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)25 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)17 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)15 ArrayList (java.util.ArrayList)12 Properties (java.util.Properties)12 BytesWritable (org.apache.hadoop.io.BytesWritable)11 IOException (java.io.IOException)8 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)8 Writable (org.apache.hadoop.io.Writable)8 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)7 InputSplit (org.apache.hadoop.mapred.InputSplit)7 Test (org.junit.Test)7 AbstractPrimitiveLazyObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.AbstractPrimitiveLazyObjectInspector)6 LazyBinaryMapObjectInspector (org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector)6 JavaBinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaBinaryObjectInspector)6 WritableBinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector)6 LinkedHashMap (java.util.LinkedHashMap)5 Path (org.apache.hadoop.fs.Path)5 TableDesc (org.apache.hadoop.hive.ql.plan.TableDesc)5