Search in sources :

Example 1 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.

the class MetaStoreUtils method getFieldsFromDeserializer.

/**
   * @param tableName
   * @param deserializer
   * @return the list of fields
   * @throws SerDeException
   * @throws MetaException
   */
public static List<FieldSchema> getFieldsFromDeserializer(String tableName, Deserializer deserializer) throws SerDeException, MetaException {
    ObjectInspector oi = deserializer.getObjectInspector();
    String[] names = tableName.split("\\.");
    String last_name = names[names.length - 1];
    for (int i = 1; i < names.length; i++) {
        if (oi instanceof StructObjectInspector) {
            StructObjectInspector soi = (StructObjectInspector) oi;
            StructField sf = soi.getStructFieldRef(names[i]);
            if (sf == null) {
                throw new MetaException("Invalid Field " + names[i]);
            } else {
                oi = sf.getFieldObjectInspector();
            }
        } else if (oi instanceof ListObjectInspector && names[i].equalsIgnoreCase("$elem$")) {
            ListObjectInspector loi = (ListObjectInspector) oi;
            oi = loi.getListElementObjectInspector();
        } else if (oi instanceof MapObjectInspector && names[i].equalsIgnoreCase("$key$")) {
            MapObjectInspector moi = (MapObjectInspector) oi;
            oi = moi.getMapKeyObjectInspector();
        } else if (oi instanceof MapObjectInspector && names[i].equalsIgnoreCase("$value$")) {
            MapObjectInspector moi = (MapObjectInspector) oi;
            oi = moi.getMapValueObjectInspector();
        } else {
            throw new MetaException("Unknown type for " + names[i]);
        }
    }
    ArrayList<FieldSchema> str_fields = new ArrayList<FieldSchema>();
    // rules on how to recurse the ObjectInspector based on its type
    if (oi.getCategory() != Category.STRUCT) {
        str_fields.add(new FieldSchema(last_name, oi.getTypeName(), FROM_SERIALIZER));
    } else {
        List<? extends StructField> fields = ((StructObjectInspector) oi).getAllStructFieldRefs();
        for (int i = 0; i < fields.size(); i++) {
            StructField structField = fields.get(i);
            String fieldName = structField.getFieldName();
            String fieldTypeName = structField.getFieldObjectInspector().getTypeName();
            String fieldComment = determineFieldComment(structField.getFieldComment());
            str_fields.add(new FieldSchema(fieldName, fieldTypeName, fieldComment));
        }
    }
    return str_fields;
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) MetaException(org.apache.hadoop.hive.metastore.api.MetaException)

Example 2 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.

the class PlanUtils method getTableDesc.

public static TableDesc getTableDesc(Class<? extends Deserializer> serdeClass, String separatorCode, String columns, String columnTypes, boolean lastColumnTakesRestOfTheLine, boolean useDelimitedJSON, String fileFormat) {
    Properties properties = Utilities.makeProperties(serdeConstants.SERIALIZATION_FORMAT, separatorCode, serdeConstants.LIST_COLUMNS, columns);
    if (!separatorCode.equals(Integer.toString(Utilities.ctrlaCode))) {
        properties.setProperty(serdeConstants.FIELD_DELIM, separatorCode);
    }
    if (columnTypes != null) {
        properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, columnTypes);
    }
    if (lastColumnTakesRestOfTheLine) {
        properties.setProperty(serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST, "true");
    }
    // Right now, it is hard-coded in the code
    if (useDelimitedJSON) {
        serdeClass = DelimitedJSONSerDe.class;
    }
    Class inputFormat, outputFormat;
    // get the input & output file formats
    if ("SequenceFile".equalsIgnoreCase(fileFormat)) {
        inputFormat = SequenceFileInputFormat.class;
        outputFormat = SequenceFileOutputFormat.class;
    } else if ("RCFile".equalsIgnoreCase(fileFormat)) {
        inputFormat = RCFileInputFormat.class;
        outputFormat = RCFileOutputFormat.class;
        assert serdeClass == ColumnarSerDe.class;
    } else if (LLAP_OUTPUT_FORMAT_KEY.equalsIgnoreCase(fileFormat)) {
        inputFormat = TextInputFormat.class;
        outputFormat = LlapOutputFormat.class;
        properties.setProperty(hive_metastoreConstants.META_TABLE_STORAGE, LLAP_OF_SH_CLASS);
    } else {
        // use TextFile by default
        inputFormat = TextInputFormat.class;
        outputFormat = IgnoreKeyTextOutputFormat.class;
    }
    properties.setProperty(serdeConstants.SERIALIZATION_LIB, serdeClass.getName());
    return new TableDesc(inputFormat, outputFormat, properties);
}
Also used : RCFileOutputFormat(org.apache.hadoop.hive.ql.io.RCFileOutputFormat) ColumnarSerDe(org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe) RCFileInputFormat(org.apache.hadoop.hive.ql.io.RCFileInputFormat) Properties(java.util.Properties)

Example 3 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project presto by prestodb.

the class HiveUtil method getDeserializer.

@SuppressWarnings("deprecation")
public static Deserializer getDeserializer(Properties schema) {
    String name = getDeserializerClassName(schema);
    Deserializer deserializer = createDeserializer(getDeserializerClass(name));
    initializeDeserializer(deserializer, schema);
    return deserializer;
}
Also used : Deserializer(org.apache.hadoop.hive.serde2.Deserializer)

Example 4 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.

the class DDLTask method describeTable.

/**
 * Write the description of a table to a file.
 *
 * @param db
 *          The database in question.
 * @param descTbl
 *          This is the table we're interested in.
 * @return Returns 0 when execution succeeds and above 0 if it fails.
 * @throws HiveException
 *           Throws this exception if an unexpected error occurs.
 * @throws MetaException
 */
private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException, MetaException {
    String colPath = descTbl.getColumnPath();
    String tableName = descTbl.getTableName();
    // describe the table - populate the output stream
    Table tbl = db.getTable(tableName, false);
    if (tbl == null) {
        throw new HiveException(ErrorMsg.INVALID_TABLE, tableName);
    }
    Partition part = null;
    if (descTbl.getPartSpec() != null) {
        part = db.getPartition(tbl, descTbl.getPartSpec(), false);
        if (part == null) {
            throw new HiveException(ErrorMsg.INVALID_PARTITION, StringUtils.join(descTbl.getPartSpec().keySet(), ','), tableName);
        }
        tbl = part.getTable();
    }
    DataOutputStream outStream = getOutputStream(descTbl.getResFile());
    try {
        LOG.debug("DDLTask: got data for {}", tableName);
        List<FieldSchema> cols = null;
        List<ColumnStatisticsObj> colStats = null;
        Deserializer deserializer = tbl.getDeserializer(true);
        if (deserializer instanceof AbstractSerDe) {
            String errorMsgs = ((AbstractSerDe) deserializer).getConfigurationErrors();
            if (errorMsgs != null && !errorMsgs.isEmpty()) {
                throw new SQLException(errorMsgs);
            }
        }
        if (colPath.equals(tableName)) {
            cols = (part == null || tbl.getTableType() == TableType.VIRTUAL_VIEW) ? tbl.getCols() : part.getCols();
            if (!descTbl.isFormatted()) {
                cols.addAll(tbl.getPartCols());
            }
            if (tbl.isPartitioned() && part == null) {
                // No partitioned specified for partitioned table, lets fetch all.
                Map<String, String> tblProps = tbl.getParameters() == null ? new HashMap<String, String>() : tbl.getParameters();
                Map<String, Long> valueMap = new HashMap<>();
                Map<String, Boolean> stateMap = new HashMap<>();
                for (String stat : StatsSetupConst.supportedStats) {
                    valueMap.put(stat, 0L);
                    stateMap.put(stat, true);
                }
                PartitionIterable parts = new PartitionIterable(db, tbl, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
                int numParts = 0;
                for (Partition partition : parts) {
                    Map<String, String> props = partition.getParameters();
                    Boolean state = StatsSetupConst.areBasicStatsUptoDate(props);
                    for (String stat : StatsSetupConst.supportedStats) {
                        stateMap.put(stat, stateMap.get(stat) && state);
                        if (props != null && props.get(stat) != null) {
                            valueMap.put(stat, valueMap.get(stat) + Long.parseLong(props.get(stat)));
                        }
                    }
                    numParts++;
                }
                for (String stat : StatsSetupConst.supportedStats) {
                    StatsSetupConst.setBasicStatsState(tblProps, Boolean.toString(stateMap.get(stat)));
                    tblProps.put(stat, valueMap.get(stat).toString());
                }
                tblProps.put(StatsSetupConst.NUM_PARTITIONS, Integer.toString(numParts));
                tbl.setParameters(tblProps);
            }
        } else {
            if (descTbl.isFormatted()) {
                // when column name is specified in describe table DDL, colPath will
                // will be table_name.column_name
                String colName = colPath.split("\\.")[1];
                String[] dbTab = Utilities.getDbTableName(tableName);
                List<String> colNames = new ArrayList<String>();
                colNames.add(colName.toLowerCase());
                if (null == part) {
                    if (tbl.isPartitioned()) {
                        Map<String, String> tblProps = tbl.getParameters() == null ? new HashMap<String, String>() : tbl.getParameters();
                        if (tbl.isPartitionKey(colNames.get(0))) {
                            FieldSchema partCol = tbl.getPartColByName(colNames.get(0));
                            cols = Collections.singletonList(partCol);
                            PartitionIterable parts = new PartitionIterable(db, tbl, null, conf.getIntVar(HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX));
                            ColumnInfo ci = new ColumnInfo(partCol.getName(), TypeInfoUtils.getTypeInfoFromTypeString(partCol.getType()), null, false);
                            ColStatistics cs = StatsUtils.getColStatsForPartCol(ci, parts, conf);
                            ColumnStatisticsData data = new ColumnStatisticsData();
                            ColStatistics.Range r = cs.getRange();
                            StatObjectConverter.fillColumnStatisticsData(partCol.getType(), data, r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue, r == null ? null : r.maxValue, r == null ? null : r.minValue.toString(), r == null ? null : r.maxValue.toString(), cs.getNumNulls(), cs.getCountDistint(), null, cs.getAvgColLen(), cs.getAvgColLen(), cs.getNumTrues(), cs.getNumFalses());
                            ColumnStatisticsObj cso = new ColumnStatisticsObj(partCol.getName(), partCol.getType(), data);
                            colStats = Collections.singletonList(cso);
                            StatsSetupConst.setColumnStatsState(tblProps, colNames);
                        } else {
                            cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
                            List<String> parts = db.getPartitionNames(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), (short) -1);
                            AggrStats aggrStats = db.getAggrColStatsFor(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), colNames, parts);
                            colStats = aggrStats.getColStats();
                            if (parts.size() == aggrStats.getPartsFound()) {
                                StatsSetupConst.setColumnStatsState(tblProps, colNames);
                            } else {
                                StatsSetupConst.removeColumnStatsState(tblProps, colNames);
                            }
                        }
                        tbl.setParameters(tblProps);
                    } else {
                        cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
                        colStats = db.getTableColumnStatistics(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), colNames);
                    }
                } else {
                    List<String> partitions = new ArrayList<String>();
                    partitions.add(part.getName());
                    cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
                    colStats = db.getPartitionColumnStatistics(dbTab[0].toLowerCase(), dbTab[1].toLowerCase(), partitions, colNames).get(part.getName());
                }
            } else {
                cols = Hive.getFieldsFromDeserializer(colPath, deserializer);
            }
        }
        PrimaryKeyInfo pkInfo = null;
        ForeignKeyInfo fkInfo = null;
        UniqueConstraint ukInfo = null;
        NotNullConstraint nnInfo = null;
        DefaultConstraint dInfo = null;
        CheckConstraint cInfo = null;
        if (descTbl.isExt() || descTbl.isFormatted()) {
            pkInfo = db.getPrimaryKeys(tbl.getDbName(), tbl.getTableName());
            fkInfo = db.getForeignKeys(tbl.getDbName(), tbl.getTableName());
            ukInfo = db.getUniqueConstraints(tbl.getDbName(), tbl.getTableName());
            nnInfo = db.getNotNullConstraints(tbl.getDbName(), tbl.getTableName());
            dInfo = db.getDefaultConstraints(tbl.getDbName(), tbl.getTableName());
            cInfo = db.getCheckConstraints(tbl.getDbName(), tbl.getTableName());
        }
        fixDecimalColumnTypeName(cols);
        // In case the query is served by HiveServer2, don't pad it with spaces,
        // as HiveServer2 output is consumed by JDBC/ODBC clients.
        boolean isOutputPadded = !SessionState.get().isHiveServerQuery();
        formatter.describeTable(outStream, colPath, tableName, tbl, part, cols, descTbl.isFormatted(), descTbl.isExt(), isOutputPadded, colStats, pkInfo, fkInfo, ukInfo, nnInfo, dInfo, cInfo);
        LOG.debug("DDLTask: written data for {}", tableName);
    } catch (SQLException e) {
        throw new HiveException(e, ErrorMsg.GENERIC_ERROR, tableName);
    } finally {
        IOUtils.closeStream(outStream);
    }
    return 0;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SQLException(java.sql.SQLException) LinkedHashMap(java.util.LinkedHashMap) HashMap(java.util.HashMap) AggrStats(org.apache.hadoop.hive.metastore.api.AggrStats) DataOutputStream(java.io.DataOutputStream) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) UniqueConstraint(org.apache.hadoop.hive.ql.metadata.UniqueConstraint) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) PrimaryKeyInfo(org.apache.hadoop.hive.ql.metadata.PrimaryKeyInfo) ForeignKeyInfo(org.apache.hadoop.hive.ql.metadata.ForeignKeyInfo) ColStatistics(org.apache.hadoop.hive.ql.plan.ColStatistics) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) Partition(org.apache.hadoop.hive.ql.metadata.Partition) AlterTableExchangePartition(org.apache.hadoop.hive.ql.plan.AlterTableExchangePartition) TextMetaDataTable(org.apache.hadoop.hive.ql.metadata.formatting.TextMetaDataTable) Table(org.apache.hadoop.hive.ql.metadata.Table) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) UniqueConstraint(org.apache.hadoop.hive.ql.metadata.UniqueConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) PartitionIterable(org.apache.hadoop.hive.ql.metadata.PartitionIterable) Deserializer(org.apache.hadoop.hive.serde2.Deserializer) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Example 5 with Deserializer

use of org.apache.hadoop.hive.serde2.Deserializer in project hive by apache.

the class DDLTask method alterTableOrSinglePartition.

private List<Task<?>> alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Partition part) throws HiveException {
    EnvironmentContext environmentContext = alterTbl.getEnvironmentContext();
    if (environmentContext == null) {
        environmentContext = new EnvironmentContext();
        alterTbl.setEnvironmentContext(environmentContext);
    }
    // do not need update stats in alter table/partition operations
    if (environmentContext.getProperties() == null || environmentContext.getProperties().get(StatsSetupConst.DO_NOT_UPDATE_STATS) == null) {
        environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE);
    }
    if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.RENAME) {
        tbl.setDbName(Utilities.getDatabaseName(alterTbl.getNewName()));
        tbl.setTableName(Utilities.getTableName(alterTbl.getNewName()));
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDCOLS) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        String serializationLib = sd.getSerdeInfo().getSerializationLib();
        AvroSerdeUtils.handleAlterTableForAvro(conf, serializationLib, tbl.getTTable().getParameters());
        List<FieldSchema> oldCols = (part == null ? tbl.getColsForMetastore() : part.getColsForMetastore());
        List<FieldSchema> newCols = alterTbl.getNewCols();
        if (serializationLib.equals("org.apache.hadoop.hive.serde.thrift.columnsetSerDe")) {
            console.printInfo("Replacing columns for columnsetSerDe and changing to LazySimpleSerDe");
            sd.getSerdeInfo().setSerializationLib(LazySimpleSerDe.class.getName());
            sd.setCols(newCols);
        } else {
            // make sure the columns does not already exist
            Iterator<FieldSchema> iterNewCols = newCols.iterator();
            while (iterNewCols.hasNext()) {
                FieldSchema newCol = iterNewCols.next();
                String newColName = newCol.getName();
                Iterator<FieldSchema> iterOldCols = oldCols.iterator();
                while (iterOldCols.hasNext()) {
                    String oldColName = iterOldCols.next().getName();
                    if (oldColName.equalsIgnoreCase(newColName)) {
                        throw new HiveException(ErrorMsg.DUPLICATE_COLUMN_NAMES, newColName);
                    }
                }
                oldCols.add(newCol);
            }
            sd.setCols(oldCols);
        }
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.RENAMECOLUMN) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        String serializationLib = sd.getSerdeInfo().getSerializationLib();
        AvroSerdeUtils.handleAlterTableForAvro(conf, serializationLib, tbl.getTTable().getParameters());
        List<FieldSchema> oldCols = (part == null ? tbl.getColsForMetastore() : part.getColsForMetastore());
        List<FieldSchema> newCols = new ArrayList<FieldSchema>();
        Iterator<FieldSchema> iterOldCols = oldCols.iterator();
        String oldName = alterTbl.getOldColName();
        String newName = alterTbl.getNewColName();
        String type = alterTbl.getNewColType();
        String comment = alterTbl.getNewColComment();
        boolean first = alterTbl.getFirst();
        String afterCol = alterTbl.getAfterCol();
        // if orc table, restrict reordering columns as it will break schema evolution
        boolean isOrcSchemaEvolution = sd.getInputFormat().equals(OrcInputFormat.class.getName()) && isSchemaEvolutionEnabled(tbl);
        if (isOrcSchemaEvolution && (first || (afterCol != null && !afterCol.trim().isEmpty()))) {
            throw new HiveException(ErrorMsg.CANNOT_REORDER_COLUMNS, alterTbl.getOldName());
        }
        FieldSchema column = null;
        boolean found = false;
        int position = -1;
        if (first) {
            position = 0;
        }
        int i = 1;
        while (iterOldCols.hasNext()) {
            FieldSchema col = iterOldCols.next();
            String oldColName = col.getName();
            if (oldColName.equalsIgnoreCase(newName) && !oldColName.equalsIgnoreCase(oldName)) {
                throw new HiveException(ErrorMsg.DUPLICATE_COLUMN_NAMES, newName);
            } else if (oldColName.equalsIgnoreCase(oldName)) {
                col.setName(newName);
                if (type != null && !type.trim().equals("")) {
                    col.setType(type);
                }
                if (comment != null) {
                    col.setComment(comment);
                }
                found = true;
                if (first || (afterCol != null && !afterCol.trim().equals(""))) {
                    column = col;
                    continue;
                }
            }
            if (afterCol != null && !afterCol.trim().equals("") && oldColName.equalsIgnoreCase(afterCol)) {
                position = i;
            }
            i++;
            newCols.add(col);
        }
        // did not find the column
        if (!found) {
            throw new HiveException(ErrorMsg.INVALID_COLUMN, oldName);
        }
        // after column is not null, but we did not find it.
        if ((afterCol != null && !afterCol.trim().equals("")) && position < 0) {
            throw new HiveException(ErrorMsg.INVALID_COLUMN, afterCol);
        }
        if (position >= 0) {
            newCols.add(position, column);
        }
        sd.setCols(newCols);
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.REPLACECOLS) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        // change SerDe to LazySimpleSerDe if it is columnsetSerDe
        String serializationLib = sd.getSerdeInfo().getSerializationLib();
        if (serializationLib.equals("org.apache.hadoop.hive.serde.thrift.columnsetSerDe")) {
            console.printInfo("Replacing columns for columnsetSerDe and changing to LazySimpleSerDe");
            sd.getSerdeInfo().setSerializationLib(LazySimpleSerDe.class.getName());
        } else if (!serializationLib.equals(MetadataTypedColumnsetSerDe.class.getName()) && !serializationLib.equals(LazySimpleSerDe.class.getName()) && !serializationLib.equals(ColumnarSerDe.class.getName()) && !serializationLib.equals(DynamicSerDe.class.getName()) && !serializationLib.equals(ParquetHiveSerDe.class.getName()) && !serializationLib.equals(OrcSerde.class.getName())) {
            throw new HiveException(ErrorMsg.CANNOT_REPLACE_COLUMNS, alterTbl.getOldName());
        }
        final boolean isOrcSchemaEvolution = serializationLib.equals(OrcSerde.class.getName()) && isSchemaEvolutionEnabled(tbl);
        // adding columns and limited integer type promotion is supported for ORC schema evolution
        if (isOrcSchemaEvolution) {
            final List<FieldSchema> existingCols = sd.getCols();
            final List<FieldSchema> replaceCols = alterTbl.getNewCols();
            if (replaceCols.size() < existingCols.size()) {
                throw new HiveException(ErrorMsg.REPLACE_CANNOT_DROP_COLUMNS, alterTbl.getOldName());
            }
        }
        boolean partitioned = tbl.isPartitioned();
        boolean droppingColumns = alterTbl.getNewCols().size() < sd.getCols().size();
        if (ParquetHiveSerDe.isParquetTable(tbl) && isSchemaEvolutionEnabled(tbl) && !alterTbl.getIsCascade() && droppingColumns && partitioned) {
            LOG.warn("Cannot drop columns from a partitioned parquet table without the CASCADE option");
            throw new HiveException(ErrorMsg.REPLACE_CANNOT_DROP_COLUMNS, alterTbl.getOldName());
        }
        sd.setCols(alterTbl.getNewCols());
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDPROPS) {
        return alterTableAddProps(alterTbl, tbl, part, environmentContext);
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.DROPPROPS) {
        return alterTableDropProps(alterTbl, tbl, part, environmentContext);
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSERDEPROPS) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        sd.getSerdeInfo().getParameters().putAll(alterTbl.getProps());
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSERDE) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        String serdeName = alterTbl.getSerdeName();
        String oldSerdeName = sd.getSerdeInfo().getSerializationLib();
        // if orc table, restrict changing the serde as it can break schema evolution
        if (isSchemaEvolutionEnabled(tbl) && oldSerdeName.equalsIgnoreCase(OrcSerde.class.getName()) && !serdeName.equalsIgnoreCase(OrcSerde.class.getName())) {
            throw new HiveException(ErrorMsg.CANNOT_CHANGE_SERDE, OrcSerde.class.getSimpleName(), alterTbl.getOldName());
        }
        sd.getSerdeInfo().setSerializationLib(serdeName);
        if ((alterTbl.getProps() != null) && (alterTbl.getProps().size() > 0)) {
            sd.getSerdeInfo().getParameters().putAll(alterTbl.getProps());
        }
        if (part != null) {
            // TODO: wtf? This doesn't do anything.
            part.getTPartition().getSd().setCols(part.getTPartition().getSd().getCols());
        } else {
            if (Table.shouldStoreFieldsInMetastore(conf, serdeName, tbl.getParameters()) && !Table.hasMetastoreBasedSchema(conf, oldSerdeName)) {
                // from old SerDe are too long to be stored in metastore, but there's nothing we can do.
                try {
                    Deserializer oldSerde = HiveMetaStoreUtils.getDeserializer(conf, tbl.getTTable(), false, oldSerdeName);
                    tbl.setFields(Hive.getFieldsFromDeserializer(tbl.getTableName(), oldSerde));
                } catch (MetaException ex) {
                    throw new HiveException(ex);
                }
            }
        }
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDFILEFORMAT) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        // if orc table, restrict changing the file format as it can break schema evolution
        if (isSchemaEvolutionEnabled(tbl) && sd.getInputFormat().equals(OrcInputFormat.class.getName()) && !alterTbl.getInputFormat().equals(OrcInputFormat.class.getName())) {
            throw new HiveException(ErrorMsg.CANNOT_CHANGE_FILEFORMAT, "ORC", alterTbl.getOldName());
        }
        sd.setInputFormat(alterTbl.getInputFormat());
        sd.setOutputFormat(alterTbl.getOutputFormat());
        if (alterTbl.getSerdeName() != null) {
            sd.getSerdeInfo().setSerializationLib(alterTbl.getSerdeName());
        }
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDCLUSTERSORTCOLUMN) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        // validate sort columns and bucket columns
        List<String> columns = Utilities.getColumnNamesFromFieldSchema(tbl.getCols());
        if (!alterTbl.isTurnOffSorting()) {
            Utilities.validateColumnNames(columns, alterTbl.getBucketColumns());
        }
        if (alterTbl.getSortColumns() != null) {
            Utilities.validateColumnNames(columns, Utilities.getColumnNamesFromSortCols(alterTbl.getSortColumns()));
        }
        if (alterTbl.isTurnOffSorting()) {
            sd.setSortCols(new ArrayList<Order>());
        } else if (alterTbl.getNumberBuckets() == -1) {
            // -1 buckets means to turn off bucketing
            sd.setBucketCols(new ArrayList<String>());
            sd.setNumBuckets(-1);
            sd.setSortCols(new ArrayList<Order>());
        } else {
            sd.setBucketCols(alterTbl.getBucketColumns());
            sd.setNumBuckets(alterTbl.getNumberBuckets());
            sd.setSortCols(alterTbl.getSortColumns());
        }
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ALTERLOCATION) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        String newLocation = alterTbl.getNewLocation();
        try {
            URI locUri = new URI(newLocation);
            if (!new Path(locUri).isAbsolute()) {
                throw new HiveException(ErrorMsg.BAD_LOCATION_VALUE, newLocation);
            }
            sd.setLocation(newLocation);
        } catch (URISyntaxException e) {
            throw new HiveException(e);
        }
        environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS);
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSKEWEDBY) {
        // Validation's been done at compile time. no validation is needed here.
        List<String> skewedColNames = null;
        List<List<String>> skewedValues = null;
        if (alterTbl.isTurnOffSkewed()) {
            // Convert skewed table to non-skewed table.
            skewedColNames = new ArrayList<String>();
            skewedValues = new ArrayList<List<String>>();
        } else {
            skewedColNames = alterTbl.getSkewedColNames();
            skewedValues = alterTbl.getSkewedColValues();
        }
        if (null == tbl.getSkewedInfo()) {
            // Convert non-skewed table to skewed table.
            SkewedInfo skewedInfo = new SkewedInfo();
            skewedInfo.setSkewedColNames(skewedColNames);
            skewedInfo.setSkewedColValues(skewedValues);
            tbl.setSkewedInfo(skewedInfo);
        } else {
            tbl.setSkewedColNames(skewedColNames);
            tbl.setSkewedColValues(skewedValues);
        }
        tbl.setStoredAsSubDirectories(alterTbl.isStoredAsSubDirectories());
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ALTERSKEWEDLOCATION) {
        // process location one-by-one
        Map<List<String>, String> locMaps = alterTbl.getSkewedLocations();
        Set<List<String>> keys = locMaps.keySet();
        for (List<String> key : keys) {
            String newLocation = locMaps.get(key);
            try {
                URI locUri = new URI(newLocation);
                if (part != null) {
                    List<String> slk = new ArrayList<String>(key);
                    part.setSkewedValueLocationMap(slk, locUri.toString());
                } else {
                    List<String> slk = new ArrayList<String>(key);
                    tbl.setSkewedValueLocationMap(slk, locUri.toString());
                }
            } catch (URISyntaxException e) {
                throw new HiveException(e);
            }
        }
        environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS);
    } else if (alterTbl.getOp() == AlterTableTypes.ALTERBUCKETNUM) {
        if (part != null) {
            if (part.getBucketCount() == alterTbl.getNumberBuckets()) {
                return null;
            }
            part.setBucketCount(alterTbl.getNumberBuckets());
        } else {
            if (tbl.getNumBuckets() == alterTbl.getNumberBuckets()) {
                return null;
            }
            tbl.setNumBuckets(alterTbl.getNumberBuckets());
        }
    } else {
        throw new HiveException(ErrorMsg.UNSUPPORTED_ALTER_TBL_OP, alterTbl.getOp().toString());
    }
    return null;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ArrayList(java.util.ArrayList) URISyntaxException(java.net.URISyntaxException) URI(java.net.URI) EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) Iterator(java.util.Iterator) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) ArrayList(java.util.ArrayList) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) AbstractList(java.util.AbstractList) List(java.util.List) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) Order(org.apache.hadoop.hive.metastore.api.Order) Path(org.apache.hadoop.fs.Path) DynamicSerDe(org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe) SQLUniqueConstraint(org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint) CheckConstraint(org.apache.hadoop.hive.ql.metadata.CheckConstraint) NotNullConstraint(org.apache.hadoop.hive.ql.metadata.NotNullConstraint) SQLCheckConstraint(org.apache.hadoop.hive.metastore.api.SQLCheckConstraint) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) DefaultConstraint(org.apache.hadoop.hive.ql.metadata.DefaultConstraint) UniqueConstraint(org.apache.hadoop.hive.ql.metadata.UniqueConstraint) SQLNotNullConstraint(org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint) MetadataTypedColumnsetSerDe(org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) OrcInputFormat(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat) Deserializer(org.apache.hadoop.hive.serde2.Deserializer)

Aggregations

Deserializer (org.apache.hadoop.hive.serde2.Deserializer)27 ArrayList (java.util.ArrayList)25 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)20 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)19 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)18 IOException (java.io.IOException)16 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)15 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)14 Properties (java.util.Properties)12 Path (org.apache.hadoop.fs.Path)11 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)10 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)10 SQLCheckConstraint (org.apache.hadoop.hive.metastore.api.SQLCheckConstraint)8 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)8 SQLNotNullConstraint (org.apache.hadoop.hive.metastore.api.SQLNotNullConstraint)8 SQLUniqueConstraint (org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint)8 DefaultConstraint (org.apache.hadoop.hive.ql.metadata.DefaultConstraint)8 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)8 HashMap (java.util.HashMap)7 List (java.util.List)7