Search in sources :

Example 1 with SkewedInfo

use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.

the class TestHBaseStore method hashSd.

@Test
public void hashSd() throws Exception {
    List<FieldSchema> cols = new ArrayList<FieldSchema>();
    cols.add(new FieldSchema("col1", "int", ""));
    SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", true, 0, serde, null, null, emptyParameters);
    Map<List<String>, String> map = new HashMap<List<String>, String>();
    map.put(Arrays.asList("col3"), "col4");
    SkewedInfo skew = new SkewedInfo(Arrays.asList("col1"), Arrays.asList(Arrays.asList("col2")), map);
    sd.setSkewedInfo(skew);
    MessageDigest md = MessageDigest.getInstance("MD5");
    byte[] baseHash = HBaseUtils.hashStorageDescriptor(sd, md);
    StorageDescriptor changeSchema = new StorageDescriptor(sd);
    changeSchema.getCols().add(new FieldSchema("col2", "varchar(32)", "a comment"));
    byte[] schemaHash = HBaseUtils.hashStorageDescriptor(changeSchema, md);
    Assert.assertFalse(Arrays.equals(baseHash, schemaHash));
    StorageDescriptor changeLocation = new StorageDescriptor(sd);
    changeLocation.setLocation("file:/somewhere/else");
    byte[] locationHash = HBaseUtils.hashStorageDescriptor(changeLocation, md);
    Assert.assertArrayEquals(baseHash, locationHash);
}
Also used : SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) List(java.util.List) ArrayList(java.util.ArrayList) MessageDigest(java.security.MessageDigest) Test(org.junit.Test)

Example 2 with SkewedInfo

use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.

the class TestHBaseStore method skewInfo.

@Test
public void skewInfo() throws Exception {
    String tableName = "mytable";
    int startTime = (int) (System.currentTimeMillis() / 1000);
    List<FieldSchema> cols = new ArrayList<FieldSchema>();
    cols.add(new FieldSchema("col1", "int", ""));
    SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
    StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", true, 0, serde, null, null, emptyParameters);
    Map<List<String>, String> map = new HashMap<List<String>, String>();
    map.put(Arrays.asList("col3"), "col4");
    SkewedInfo skew = new SkewedInfo(Arrays.asList("col1"), Arrays.asList(Arrays.asList("col2")), map);
    sd.setSkewedInfo(skew);
    Table table = new Table(tableName, "default", "me", startTime, startTime, 0, sd, null, emptyParameters, null, null, null);
    store.createTable(table);
    Table t = store.getTable("default", tableName);
    Assert.assertEquals(1, t.getSd().getColsSize());
    Assert.assertEquals("col1", t.getSd().getCols().get(0).getName());
    Assert.assertEquals("int", t.getSd().getCols().get(0).getType());
    Assert.assertEquals("", t.getSd().getCols().get(0).getComment());
    Assert.assertEquals("serde", t.getSd().getSerdeInfo().getName());
    Assert.assertEquals("seriallib", t.getSd().getSerdeInfo().getSerializationLib());
    Assert.assertEquals("file:/tmp", t.getSd().getLocation());
    Assert.assertEquals("input", t.getSd().getInputFormat());
    Assert.assertEquals("output", t.getSd().getOutputFormat());
    Assert.assertTrue(t.getSd().isCompressed());
    Assert.assertEquals(0, t.getSd().getNumBuckets());
    Assert.assertEquals(0, t.getSd().getSortColsSize());
    Assert.assertEquals("me", t.getOwner());
    Assert.assertEquals("default", t.getDbName());
    Assert.assertEquals(tableName, t.getTableName());
    Assert.assertEquals(0, t.getParametersSize());
    skew = t.getSd().getSkewedInfo();
    Assert.assertNotNull(skew);
    Assert.assertEquals(1, skew.getSkewedColNamesSize());
    Assert.assertEquals("col1", skew.getSkewedColNames().get(0));
    Assert.assertEquals(1, skew.getSkewedColValuesSize());
    Assert.assertEquals("col2", skew.getSkewedColValues().get(0).get(0));
    Assert.assertEquals(1, skew.getSkewedColValueLocationMapsSize());
    Assert.assertEquals("col4", skew.getSkewedColValueLocationMaps().get(Arrays.asList("col3")));
}
Also used : Table(org.apache.hadoop.hive.metastore.api.Table) SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) HashMap(java.util.HashMap) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) List(java.util.List) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 3 with SkewedInfo

use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.

the class DDLTask method alterTableOrSinglePartition.

private int alterTableOrSinglePartition(AlterTableDesc alterTbl, Table tbl, Partition part) throws HiveException {
    EnvironmentContext environmentContext = alterTbl.getEnvironmentContext();
    if (environmentContext == null) {
        environmentContext = new EnvironmentContext();
        alterTbl.setEnvironmentContext(environmentContext);
    }
    // do not need update stats in alter table/partition operations
    if (environmentContext.getProperties() == null || environmentContext.getProperties().get(StatsSetupConst.DO_NOT_UPDATE_STATS) == null) {
        environmentContext.putToProperties(StatsSetupConst.DO_NOT_UPDATE_STATS, StatsSetupConst.TRUE);
    }
    if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.RENAME) {
        tbl.setDbName(Utilities.getDatabaseName(alterTbl.getNewName()));
        tbl.setTableName(Utilities.getTableName(alterTbl.getNewName()));
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDCOLS) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        String serializationLib = sd.getSerdeInfo().getSerializationLib();
        AvroSerdeUtils.handleAlterTableForAvro(conf, serializationLib, tbl.getTTable().getParameters());
        List<FieldSchema> oldCols = (part == null ? tbl.getColsForMetastore() : part.getColsForMetastore());
        List<FieldSchema> newCols = alterTbl.getNewCols();
        if (serializationLib.equals("org.apache.hadoop.hive.serde.thrift.columnsetSerDe")) {
            console.printInfo("Replacing columns for columnsetSerDe and changing to LazySimpleSerDe");
            sd.getSerdeInfo().setSerializationLib(LazySimpleSerDe.class.getName());
            sd.setCols(newCols);
        } else {
            // make sure the columns does not already exist
            Iterator<FieldSchema> iterNewCols = newCols.iterator();
            while (iterNewCols.hasNext()) {
                FieldSchema newCol = iterNewCols.next();
                String newColName = newCol.getName();
                Iterator<FieldSchema> iterOldCols = oldCols.iterator();
                while (iterOldCols.hasNext()) {
                    String oldColName = iterOldCols.next().getName();
                    if (oldColName.equalsIgnoreCase(newColName)) {
                        throw new HiveException(ErrorMsg.DUPLICATE_COLUMN_NAMES, newColName);
                    }
                }
                oldCols.add(newCol);
            }
            sd.setCols(oldCols);
        }
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.RENAMECOLUMN) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        String serializationLib = sd.getSerdeInfo().getSerializationLib();
        AvroSerdeUtils.handleAlterTableForAvro(conf, serializationLib, tbl.getTTable().getParameters());
        List<FieldSchema> oldCols = (part == null ? tbl.getColsForMetastore() : part.getColsForMetastore());
        List<FieldSchema> newCols = new ArrayList<FieldSchema>();
        Iterator<FieldSchema> iterOldCols = oldCols.iterator();
        String oldName = alterTbl.getOldColName();
        String newName = alterTbl.getNewColName();
        String type = alterTbl.getNewColType();
        String comment = alterTbl.getNewColComment();
        boolean first = alterTbl.getFirst();
        String afterCol = alterTbl.getAfterCol();
        // if orc table, restrict reordering columns as it will break schema evolution
        boolean isOrcSchemaEvolution = sd.getInputFormat().equals(OrcInputFormat.class.getName()) && isSchemaEvolutionEnabled(tbl);
        if (isOrcSchemaEvolution && (first || (afterCol != null && !afterCol.trim().isEmpty()))) {
            throw new HiveException(ErrorMsg.CANNOT_REORDER_COLUMNS, alterTbl.getOldName());
        }
        FieldSchema column = null;
        boolean found = false;
        int position = -1;
        if (first) {
            position = 0;
        }
        int i = 1;
        while (iterOldCols.hasNext()) {
            FieldSchema col = iterOldCols.next();
            String oldColName = col.getName();
            if (oldColName.equalsIgnoreCase(newName) && !oldColName.equalsIgnoreCase(oldName)) {
                throw new HiveException(ErrorMsg.DUPLICATE_COLUMN_NAMES, newName);
            } else if (oldColName.equalsIgnoreCase(oldName)) {
                col.setName(newName);
                if (type != null && !type.trim().equals("")) {
                    col.setType(type);
                }
                if (comment != null) {
                    col.setComment(comment);
                }
                found = true;
                if (first || (afterCol != null && !afterCol.trim().equals(""))) {
                    column = col;
                    continue;
                }
            }
            if (afterCol != null && !afterCol.trim().equals("") && oldColName.equalsIgnoreCase(afterCol)) {
                position = i;
            }
            i++;
            newCols.add(col);
        }
        // did not find the column
        if (!found) {
            throw new HiveException(ErrorMsg.INVALID_COLUMN, oldName);
        }
        // after column is not null, but we did not find it.
        if ((afterCol != null && !afterCol.trim().equals("")) && position < 0) {
            throw new HiveException(ErrorMsg.INVALID_COLUMN, afterCol);
        }
        if (position >= 0) {
            newCols.add(position, column);
        }
        sd.setCols(newCols);
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.REPLACECOLS) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        // change SerDe to LazySimpleSerDe if it is columnsetSerDe
        String serializationLib = sd.getSerdeInfo().getSerializationLib();
        if (serializationLib.equals("org.apache.hadoop.hive.serde.thrift.columnsetSerDe")) {
            console.printInfo("Replacing columns for columnsetSerDe and changing to LazySimpleSerDe");
            sd.getSerdeInfo().setSerializationLib(LazySimpleSerDe.class.getName());
        } else if (!serializationLib.equals(MetadataTypedColumnsetSerDe.class.getName()) && !serializationLib.equals(LazySimpleSerDe.class.getName()) && !serializationLib.equals(ColumnarSerDe.class.getName()) && !serializationLib.equals(DynamicSerDe.class.getName()) && !serializationLib.equals(ParquetHiveSerDe.class.getName()) && !serializationLib.equals(OrcSerde.class.getName())) {
            throw new HiveException(ErrorMsg.CANNOT_REPLACE_COLUMNS, alterTbl.getOldName());
        }
        final boolean isOrcSchemaEvolution = serializationLib.equals(OrcSerde.class.getName()) && isSchemaEvolutionEnabled(tbl);
        // adding columns and limited integer type promotion is supported for ORC schema evolution
        if (isOrcSchemaEvolution) {
            final List<FieldSchema> existingCols = sd.getCols();
            final List<FieldSchema> replaceCols = alterTbl.getNewCols();
            if (replaceCols.size() < existingCols.size()) {
                throw new HiveException(ErrorMsg.REPLACE_CANNOT_DROP_COLUMNS, alterTbl.getOldName());
            }
        }
        sd.setCols(alterTbl.getNewCols());
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDPROPS) {
        if (StatsSetupConst.USER.equals(environmentContext.getProperties().get(StatsSetupConst.STATS_GENERATED))) {
            environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS);
        }
        if (part != null) {
            part.getTPartition().getParameters().putAll(alterTbl.getProps());
        } else {
            tbl.getTTable().getParameters().putAll(alterTbl.getProps());
        }
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.DROPPROPS) {
        Iterator<String> keyItr = alterTbl.getProps().keySet().iterator();
        if (StatsSetupConst.USER.equals(environmentContext.getProperties().get(StatsSetupConst.STATS_GENERATED))) {
            // drop a stats parameter, which triggers recompute stats update automatically
            environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS);
        }
        while (keyItr.hasNext()) {
            if (part != null) {
                part.getTPartition().getParameters().remove(keyItr.next());
            } else {
                tbl.getTTable().getParameters().remove(keyItr.next());
            }
        }
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSERDEPROPS) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        sd.getSerdeInfo().getParameters().putAll(alterTbl.getProps());
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSERDE) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        String serdeName = alterTbl.getSerdeName();
        String oldSerdeName = sd.getSerdeInfo().getSerializationLib();
        // if orc table, restrict changing the serde as it can break schema evolution
        if (isSchemaEvolutionEnabled(tbl) && oldSerdeName.equalsIgnoreCase(OrcSerde.class.getName()) && !serdeName.equalsIgnoreCase(OrcSerde.class.getName())) {
            throw new HiveException(ErrorMsg.CANNOT_CHANGE_SERDE, OrcSerde.class.getSimpleName(), alterTbl.getOldName());
        }
        sd.getSerdeInfo().setSerializationLib(serdeName);
        if ((alterTbl.getProps() != null) && (alterTbl.getProps().size() > 0)) {
            sd.getSerdeInfo().getParameters().putAll(alterTbl.getProps());
        }
        if (part != null) {
            // TODO: wtf? This doesn't do anything.
            part.getTPartition().getSd().setCols(part.getTPartition().getSd().getCols());
        } else {
            if (Table.shouldStoreFieldsInMetastore(conf, serdeName, tbl.getParameters()) && !Table.hasMetastoreBasedSchema(conf, oldSerdeName)) {
                // from old SerDe are too long to be stored in metastore, but there's nothing we can do.
                try {
                    Deserializer oldSerde = MetaStoreUtils.getDeserializer(conf, tbl.getTTable(), false, oldSerdeName);
                    tbl.setFields(Hive.getFieldsFromDeserializer(tbl.getTableName(), oldSerde));
                } catch (MetaException ex) {
                    throw new HiveException(ex);
                }
            }
        }
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDFILEFORMAT) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        // if orc table, restrict changing the file format as it can break schema evolution
        if (isSchemaEvolutionEnabled(tbl) && sd.getInputFormat().equals(OrcInputFormat.class.getName()) && !alterTbl.getInputFormat().equals(OrcInputFormat.class.getName())) {
            throw new HiveException(ErrorMsg.CANNOT_CHANGE_FILEFORMAT, "ORC", alterTbl.getOldName());
        }
        sd.setInputFormat(alterTbl.getInputFormat());
        sd.setOutputFormat(alterTbl.getOutputFormat());
        if (alterTbl.getSerdeName() != null) {
            sd.getSerdeInfo().setSerializationLib(alterTbl.getSerdeName());
        }
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDCLUSTERSORTCOLUMN) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        // validate sort columns and bucket columns
        List<String> columns = Utilities.getColumnNamesFromFieldSchema(tbl.getCols());
        if (!alterTbl.isTurnOffSorting()) {
            Utilities.validateColumnNames(columns, alterTbl.getBucketColumns());
        }
        if (alterTbl.getSortColumns() != null) {
            Utilities.validateColumnNames(columns, Utilities.getColumnNamesFromSortCols(alterTbl.getSortColumns()));
        }
        if (alterTbl.isTurnOffSorting()) {
            sd.setSortCols(new ArrayList<Order>());
        } else if (alterTbl.getNumberBuckets() == -1) {
            // -1 buckets means to turn off bucketing
            sd.setBucketCols(new ArrayList<String>());
            sd.setNumBuckets(-1);
            sd.setSortCols(new ArrayList<Order>());
        } else {
            sd.setBucketCols(alterTbl.getBucketColumns());
            sd.setNumBuckets(alterTbl.getNumberBuckets());
            sd.setSortCols(alterTbl.getSortColumns());
        }
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ALTERLOCATION) {
        StorageDescriptor sd = retrieveStorageDescriptor(tbl, part);
        String newLocation = alterTbl.getNewLocation();
        try {
            URI locUri = new URI(newLocation);
            if (!new Path(locUri).isAbsolute()) {
                throw new HiveException(ErrorMsg.BAD_LOCATION_VALUE, newLocation);
            }
            sd.setLocation(newLocation);
        } catch (URISyntaxException e) {
            throw new HiveException(e);
        }
        environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS);
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDSKEWEDBY) {
        // Validation's been done at compile time. no validation is needed here.
        List<String> skewedColNames = null;
        List<List<String>> skewedValues = null;
        if (alterTbl.isTurnOffSkewed()) {
            // Convert skewed table to non-skewed table.
            skewedColNames = new ArrayList<String>();
            skewedValues = new ArrayList<List<String>>();
        } else {
            skewedColNames = alterTbl.getSkewedColNames();
            skewedValues = alterTbl.getSkewedColValues();
        }
        if (null == tbl.getSkewedInfo()) {
            // Convert non-skewed table to skewed table.
            SkewedInfo skewedInfo = new SkewedInfo();
            skewedInfo.setSkewedColNames(skewedColNames);
            skewedInfo.setSkewedColValues(skewedValues);
            tbl.setSkewedInfo(skewedInfo);
        } else {
            tbl.setSkewedColNames(skewedColNames);
            tbl.setSkewedColValues(skewedValues);
        }
        tbl.setStoredAsSubDirectories(alterTbl.isStoredAsSubDirectories());
    } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ALTERSKEWEDLOCATION) {
        // process location one-by-one
        Map<List<String>, String> locMaps = alterTbl.getSkewedLocations();
        Set<List<String>> keys = locMaps.keySet();
        for (List<String> key : keys) {
            String newLocation = locMaps.get(key);
            try {
                URI locUri = new URI(newLocation);
                if (part != null) {
                    List<String> slk = new ArrayList<String>(key);
                    part.setSkewedValueLocationMap(slk, locUri.toString());
                } else {
                    List<String> slk = new ArrayList<String>(key);
                    tbl.setSkewedValueLocationMap(slk, locUri.toString());
                }
            } catch (URISyntaxException e) {
                throw new HiveException(e);
            }
        }
        environmentContext.getProperties().remove(StatsSetupConst.DO_NOT_UPDATE_STATS);
    } else if (alterTbl.getOp() == AlterTableTypes.ALTERBUCKETNUM) {
        if (part != null) {
            if (part.getBucketCount() == alterTbl.getNumberBuckets()) {
                return 0;
            }
            part.setBucketCount(alterTbl.getNumberBuckets());
        } else {
            if (tbl.getNumBuckets() == alterTbl.getNumberBuckets()) {
                return 0;
            }
            tbl.setNumBuckets(alterTbl.getNumberBuckets());
        }
    } else {
        throw new HiveException(ErrorMsg.UNSUPPORTED_ALTER_TBL_OP, alterTbl.getOp().toString());
    }
    return 0;
}
Also used : HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) LazySimpleSerDe(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) ArrayList(java.util.ArrayList) URISyntaxException(java.net.URISyntaxException) URI(java.net.URI) EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) AbstractList(java.util.AbstractList) List(java.util.List) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) Order(org.apache.hadoop.hive.metastore.api.Order) Path(org.apache.hadoop.fs.Path) DynamicSerDe(org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe) MetadataTypedColumnsetSerDe(org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) OrcInputFormat(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat) Deserializer(org.apache.hadoop.hive.serde2.Deserializer)

Example 4 with SkewedInfo

use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.

the class DDLTask method showCreateTable.

private int showCreateTable(Hive db, DataOutputStream outStream, String tableName) throws HiveException {
    final String EXTERNAL = "external";
    final String TEMPORARY = "temporary";
    final String LIST_COLUMNS = "columns";
    final String TBL_COMMENT = "tbl_comment";
    final String LIST_PARTITIONS = "partitions";
    final String SORT_BUCKET = "sort_bucket";
    final String SKEWED_INFO = "tbl_skewedinfo";
    final String ROW_FORMAT = "row_format";
    final String TBL_LOCATION = "tbl_location";
    final String TBL_PROPERTIES = "tbl_properties";
    boolean needsLocation = true;
    StringBuilder createTab_str = new StringBuilder();
    Table tbl = db.getTable(tableName, false);
    List<String> duplicateProps = new ArrayList<String>();
    try {
        needsLocation = doesTableNeedLocation(tbl);
        if (tbl.isView()) {
            String createTab_stmt = "CREATE VIEW `" + tableName + "` AS " + tbl.getViewExpandedText();
            outStream.write(createTab_stmt.getBytes(StandardCharsets.UTF_8));
            return 0;
        }
        createTab_str.append("CREATE <" + TEMPORARY + "><" + EXTERNAL + ">TABLE `");
        createTab_str.append(tableName + "`(\n");
        createTab_str.append("<" + LIST_COLUMNS + ">)\n");
        createTab_str.append("<" + TBL_COMMENT + ">\n");
        createTab_str.append("<" + LIST_PARTITIONS + ">\n");
        createTab_str.append("<" + SORT_BUCKET + ">\n");
        createTab_str.append("<" + SKEWED_INFO + ">\n");
        createTab_str.append("<" + ROW_FORMAT + ">\n");
        if (needsLocation) {
            createTab_str.append("LOCATION\n");
            createTab_str.append("<" + TBL_LOCATION + ">\n");
        }
        createTab_str.append("TBLPROPERTIES (\n");
        createTab_str.append("<" + TBL_PROPERTIES + ">)\n");
        ST createTab_stmt = new ST(createTab_str.toString());
        // For cases where the table is temporary
        String tbl_temp = "";
        if (tbl.isTemporary()) {
            duplicateProps.add("TEMPORARY");
            tbl_temp = "TEMPORARY ";
        }
        // For cases where the table is external
        String tbl_external = "";
        if (tbl.getTableType() == TableType.EXTERNAL_TABLE) {
            duplicateProps.add("EXTERNAL");
            tbl_external = "EXTERNAL ";
        }
        // Columns
        String tbl_columns = "";
        List<FieldSchema> cols = tbl.getCols();
        List<String> columns = new ArrayList<String>();
        for (FieldSchema col : cols) {
            String columnDesc = "  `" + col.getName() + "` " + col.getType();
            if (col.getComment() != null) {
                columnDesc = columnDesc + " COMMENT '" + HiveStringUtils.escapeHiveCommand(col.getComment()) + "'";
            }
            columns.add(columnDesc);
        }
        tbl_columns = StringUtils.join(columns, ", \n");
        // Table comment
        String tbl_comment = "";
        String tabComment = tbl.getProperty("comment");
        if (tabComment != null) {
            duplicateProps.add("comment");
            tbl_comment = "COMMENT '" + HiveStringUtils.escapeHiveCommand(tabComment) + "'";
        }
        // Partitions
        String tbl_partitions = "";
        List<FieldSchema> partKeys = tbl.getPartitionKeys();
        if (partKeys.size() > 0) {
            tbl_partitions += "PARTITIONED BY ( \n";
            List<String> partCols = new ArrayList<String>();
            for (FieldSchema partKey : partKeys) {
                String partColDesc = "  `" + partKey.getName() + "` " + partKey.getType();
                if (partKey.getComment() != null) {
                    partColDesc = partColDesc + " COMMENT '" + HiveStringUtils.escapeHiveCommand(partKey.getComment()) + "'";
                }
                partCols.add(partColDesc);
            }
            tbl_partitions += StringUtils.join(partCols, ", \n");
            tbl_partitions += ")";
        }
        // Clusters (Buckets)
        String tbl_sort_bucket = "";
        List<String> buckCols = tbl.getBucketCols();
        if (buckCols.size() > 0) {
            duplicateProps.add("SORTBUCKETCOLSPREFIX");
            tbl_sort_bucket += "CLUSTERED BY ( \n  ";
            tbl_sort_bucket += StringUtils.join(buckCols, ", \n  ");
            tbl_sort_bucket += ") \n";
            List<Order> sortCols = tbl.getSortCols();
            if (sortCols.size() > 0) {
                tbl_sort_bucket += "SORTED BY ( \n";
                // Order
                List<String> sortKeys = new ArrayList<String>();
                for (Order sortCol : sortCols) {
                    String sortKeyDesc = "  " + sortCol.getCol() + " ";
                    if (sortCol.getOrder() == BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_ASC) {
                        sortKeyDesc = sortKeyDesc + "ASC";
                    } else if (sortCol.getOrder() == BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_DESC) {
                        sortKeyDesc = sortKeyDesc + "DESC";
                    }
                    sortKeys.add(sortKeyDesc);
                }
                tbl_sort_bucket += StringUtils.join(sortKeys, ", \n");
                tbl_sort_bucket += ") \n";
            }
            tbl_sort_bucket += "INTO " + tbl.getNumBuckets() + " BUCKETS";
        }
        // Skewed Info
        StringBuilder tbl_skewedinfo = new StringBuilder();
        SkewedInfo skewedInfo = tbl.getSkewedInfo();
        if (skewedInfo != null && !skewedInfo.getSkewedColNames().isEmpty()) {
            tbl_skewedinfo.append("SKEWED BY (" + StringUtils.join(skewedInfo.getSkewedColNames(), ",") + ")\n");
            tbl_skewedinfo.append("  ON (");
            List<String> colValueList = new ArrayList<String>();
            for (List<String> colValues : skewedInfo.getSkewedColValues()) {
                colValueList.add("('" + StringUtils.join(colValues, "','") + "')");
            }
            tbl_skewedinfo.append(StringUtils.join(colValueList, ",") + ")");
            if (tbl.isStoredAsSubDirectories()) {
                tbl_skewedinfo.append("\n  STORED AS DIRECTORIES");
            }
        }
        // Row format (SerDe)
        StringBuilder tbl_row_format = new StringBuilder();
        StorageDescriptor sd = tbl.getTTable().getSd();
        SerDeInfo serdeInfo = sd.getSerdeInfo();
        Map<String, String> serdeParams = serdeInfo.getParameters();
        tbl_row_format.append("ROW FORMAT SERDE \n");
        tbl_row_format.append("  '" + HiveStringUtils.escapeHiveCommand(serdeInfo.getSerializationLib()) + "' \n");
        if (tbl.getStorageHandler() == null) {
            // SERDE properties
            if (MetaStoreUtils.DEFAULT_SERIALIZATION_FORMAT.equals(serdeParams.get(serdeConstants.SERIALIZATION_FORMAT))) {
                serdeParams.remove(serdeConstants.SERIALIZATION_FORMAT);
            }
            if (!serdeParams.isEmpty()) {
                appendSerdeParams(tbl_row_format, serdeParams).append(" \n");
            }
            tbl_row_format.append("STORED AS INPUTFORMAT \n  '" + HiveStringUtils.escapeHiveCommand(sd.getInputFormat()) + "' \n");
            tbl_row_format.append("OUTPUTFORMAT \n  '" + HiveStringUtils.escapeHiveCommand(sd.getOutputFormat()) + "'");
        } else {
            duplicateProps.add(META_TABLE_STORAGE);
            tbl_row_format.append("STORED BY \n  '" + HiveStringUtils.escapeHiveCommand(tbl.getParameters().get(META_TABLE_STORAGE)) + "' \n");
            // SerDe Properties
            if (!serdeParams.isEmpty()) {
                appendSerdeParams(tbl_row_format, serdeInfo.getParameters());
            }
        }
        String tbl_location = "  '" + HiveStringUtils.escapeHiveCommand(sd.getLocation()) + "'";
        // Table properties
        duplicateProps.addAll(Arrays.asList(StatsSetupConst.TABLE_PARAMS_STATS_KEYS));
        String tbl_properties = propertiesToString(tbl.getParameters(), duplicateProps);
        createTab_stmt.add(TEMPORARY, tbl_temp);
        createTab_stmt.add(EXTERNAL, tbl_external);
        createTab_stmt.add(LIST_COLUMNS, tbl_columns);
        createTab_stmt.add(TBL_COMMENT, tbl_comment);
        createTab_stmt.add(LIST_PARTITIONS, tbl_partitions);
        createTab_stmt.add(SORT_BUCKET, tbl_sort_bucket);
        createTab_stmt.add(SKEWED_INFO, tbl_skewedinfo);
        createTab_stmt.add(ROW_FORMAT, tbl_row_format);
        // Table location should not be printed with hbase backed tables
        if (needsLocation) {
            createTab_stmt.add(TBL_LOCATION, tbl_location);
        }
        createTab_stmt.add(TBL_PROPERTIES, tbl_properties);
        outStream.write(createTab_stmt.render().getBytes(StandardCharsets.UTF_8));
    } catch (IOException e) {
        LOG.info("show create table: " + stringifyException(e));
        return 1;
    }
    return 0;
}
Also used : Order(org.apache.hadoop.hive.metastore.api.Order) ST(org.stringtemplate.v4.ST) Table(org.apache.hadoop.hive.ql.metadata.Table) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) ArrayList(java.util.ArrayList) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) IOException(java.io.IOException) SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo)

Example 5 with SkewedInfo

use of org.apache.hadoop.hive.metastore.api.SkewedInfo in project hive by apache.

the class Hive method loadPartition.

/**
   * Load a directory into a Hive Table Partition - Alters existing content of
   * the partition with the contents of loadPath. - If the partition does not
   * exist - one is created - files in loadPath are moved into Hive. But the
   * directory itself is not removed.
   *
   * @param loadPath
   *          Directory containing files to load into Table
   * @param  tbl
   *          name of table to be loaded.
   * @param partSpec
   *          defines which partition needs to be loaded
   * @param replace
   *          if true - replace files in the partition, otherwise add files to
   *          the partition
   * @param inheritTableSpecs if true, on [re]creating the partition, take the
   *          location/inputformat/outputformat/serde details from table spec
   * @param isSrcLocal
   *          If the source directory is LOCAL
   * @param isAcid true if this is an ACID operation
   */
public Partition loadPartition(Path loadPath, Table tbl, Map<String, String> partSpec, boolean replace, boolean inheritTableSpecs, boolean isSkewedStoreAsSubdir, boolean isSrcLocal, boolean isAcid, boolean hasFollowingStatsTask) throws HiveException {
    Path tblDataLocationPath = tbl.getDataLocation();
    try {
        Partition oldPart = getPartition(tbl, partSpec, false);
        /**
       * Move files before creating the partition since down stream processes
       * check for existence of partition in metadata before accessing the data.
       * If partition is created before data is moved, downstream waiting
       * processes might move forward with partial data
       */
        Path oldPartPath = (oldPart != null) ? oldPart.getDataLocation() : null;
        Path newPartPath = null;
        if (inheritTableSpecs) {
            Path partPath = new Path(tbl.getDataLocation(), Warehouse.makePartPath(partSpec));
            newPartPath = new Path(tblDataLocationPath.toUri().getScheme(), tblDataLocationPath.toUri().getAuthority(), partPath.toUri().getPath());
            if (oldPart != null) {
                /*
           * If we are moving the partition across filesystem boundaries
           * inherit from the table properties. Otherwise (same filesystem) use the
           * original partition location.
           *
           * See: HIVE-1707 and HIVE-2117 for background
           */
                FileSystem oldPartPathFS = oldPartPath.getFileSystem(getConf());
                FileSystem loadPathFS = loadPath.getFileSystem(getConf());
                if (FileUtils.equalsFileSystem(oldPartPathFS, loadPathFS)) {
                    newPartPath = oldPartPath;
                }
            }
        } else {
            newPartPath = oldPartPath;
        }
        List<Path> newFiles = null;
        PerfLogger perfLogger = SessionState.getPerfLogger();
        perfLogger.PerfLogBegin("MoveTask", "FileMoves");
        if (replace || (oldPart == null && !isAcid)) {
            replaceFiles(tbl.getPath(), loadPath, newPartPath, oldPartPath, getConf(), isSrcLocal);
        } else {
            if (conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary() && oldPart != null) {
                newFiles = Collections.synchronizedList(new ArrayList<Path>());
            }
            FileSystem fs = tbl.getDataLocation().getFileSystem(conf);
            Hive.copyFiles(conf, loadPath, newPartPath, fs, isSrcLocal, isAcid, newFiles);
        }
        perfLogger.PerfLogEnd("MoveTask", "FileMoves");
        Partition newTPart = oldPart != null ? oldPart : new Partition(tbl, partSpec, newPartPath);
        alterPartitionSpecInMemory(tbl, partSpec, newTPart.getTPartition(), inheritTableSpecs, newPartPath.toString());
        validatePartition(newTPart);
        if ((null != newFiles) || replace) {
            fireInsertEvent(tbl, partSpec, newFiles);
        } else {
            LOG.debug("No new files were created, and is not a replace. Skipping generating INSERT event.");
        }
        //column stats will be inaccurate
        StatsSetupConst.clearColumnStatsState(newTPart.getParameters());
        // recreate the partition if it existed before
        if (isSkewedStoreAsSubdir) {
            org.apache.hadoop.hive.metastore.api.Partition newCreatedTpart = newTPart.getTPartition();
            SkewedInfo skewedInfo = newCreatedTpart.getSd().getSkewedInfo();
            /* Construct list bucketing location mappings from sub-directory name. */
            Map<List<String>, String> skewedColValueLocationMaps = constructListBucketingLocationMap(newPartPath, skewedInfo);
            /* Add list bucketing location mappings. */
            skewedInfo.setSkewedColValueLocationMaps(skewedColValueLocationMaps);
            newCreatedTpart.getSd().setSkewedInfo(skewedInfo);
        }
        if (!this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
            StatsSetupConst.setBasicStatsState(newTPart.getParameters(), StatsSetupConst.FALSE);
        }
        if (oldPart == null) {
            newTPart.getTPartition().setParameters(new HashMap<String, String>());
            if (this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
                StatsSetupConst.setBasicStatsStateForCreateTable(newTPart.getParameters(), StatsSetupConst.TRUE);
            }
            MetaStoreUtils.populateQuickStats(HiveStatsUtils.getFileStatusRecurse(newPartPath, -1, newPartPath.getFileSystem(conf)), newTPart.getParameters());
            try {
                LOG.debug("Adding new partition " + newTPart.getSpec());
                getSychronizedMSC().add_partition(newTPart.getTPartition());
            } catch (AlreadyExistsException aee) {
                // With multiple users concurrently issuing insert statements on the same partition has
                // a side effect that some queries may not see a partition at the time when they're issued,
                // but will realize the partition is actually there when it is trying to add such partition
                // to the metastore and thus get AlreadyExistsException, because some earlier query just created it (race condition).
                // For example, imagine such a table is created:
                //  create table T (name char(50)) partitioned by (ds string);
                // and the following two queries are launched at the same time, from different sessions:
                //  insert into table T partition (ds) values ('Bob', 'today'); -- creates the partition 'today'
                //  insert into table T partition (ds) values ('Joe', 'today'); -- will fail with AlreadyExistsException
                // In that case, we want to retry with alterPartition.
                LOG.debug("Caught AlreadyExistsException, trying to alter partition instead");
                setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart);
            }
        } else {
            setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart);
        }
        return newTPart;
    } catch (IOException e) {
        LOG.error(StringUtils.stringifyException(e));
        throw new HiveException(e);
    } catch (MetaException e) {
        LOG.error(StringUtils.stringifyException(e));
        throw new HiveException(e);
    } catch (InvalidOperationException e) {
        LOG.error(StringUtils.stringifyException(e));
        throw new HiveException(e);
    } catch (TException e) {
        LOG.error(StringUtils.stringifyException(e));
        throw new HiveException(e);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) TException(org.apache.thrift.TException) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) PerfLogger(org.apache.hadoop.hive.ql.log.PerfLogger) ArrayList(java.util.ArrayList) IOException(java.io.IOException) SkewedInfo(org.apache.hadoop.hive.metastore.api.SkewedInfo) FileSystem(org.apache.hadoop.fs.FileSystem) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) ArrayList(java.util.ArrayList) List(java.util.List) LinkedList(java.util.LinkedList) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveMetaException(org.apache.hadoop.hive.metastore.HiveMetaException)

Aggregations

SkewedInfo (org.apache.hadoop.hive.metastore.api.SkewedInfo)16 ArrayList (java.util.ArrayList)12 StorageDescriptor (org.apache.hadoop.hive.metastore.api.StorageDescriptor)12 List (java.util.List)10 SerDeInfo (org.apache.hadoop.hive.metastore.api.SerDeInfo)9 HashMap (java.util.HashMap)8 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)7 Order (org.apache.hadoop.hive.metastore.api.Order)7 Path (org.apache.hadoop.fs.Path)4 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)4 Test (org.junit.Test)4 ByteString (com.google.protobuf.ByteString)3 IOException (java.io.IOException)3 Map (java.util.Map)3 TreeMap (java.util.TreeMap)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)3 LinkedHashMap (java.util.LinkedHashMap)2 LinkedList (java.util.LinkedList)2 SortedMap (java.util.SortedMap)2