Search in sources :

Example 1 with HiveMetaStoreChecker

use of org.apache.hadoop.hive.ql.metadata.HiveMetaStoreChecker in project hive by apache.

the class DDLTask method msck.

/**
   * MetastoreCheck, see if the data in the metastore matches what is on the
   * dfs. Current version checks for tables and partitions that are either
   * missing on disk on in the metastore.
   *
   * @param db
   *          The database in question.
   * @param msckDesc
   *          Information about the tables and partitions we want to check for.
   * @return Returns 0 when execution succeeds and above 0 if it fails.
   */
private int msck(Hive db, MsckDesc msckDesc) {
    CheckResult result = new CheckResult();
    List<String> repairOutput = new ArrayList<String>();
    try {
        HiveMetaStoreChecker checker = new HiveMetaStoreChecker(db);
        String[] names = Utilities.getDbTableName(msckDesc.getTableName());
        checker.checkMetastore(names[0], names[1], msckDesc.getPartSpecs(), result);
        Set<CheckResult.PartitionResult> partsNotInMs = result.getPartitionsNotInMs();
        if (msckDesc.isRepairPartitions() && !partsNotInMs.isEmpty()) {
            AbstractList<String> vals = null;
            String settingStr = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION);
            boolean doValidate = !("ignore".equals(settingStr));
            boolean doSkip = doValidate && "skip".equals(settingStr);
            // The default setting is "throw"; assume doValidate && !doSkip means throw.
            if (doValidate) {
                // Validate that we can add partition without escaping. Escaping was originally intended
                // to avoid creating invalid HDFS paths; however, if we escape the HDFS path (that we
                // deem invalid but HDFS actually supports - it is possible to create HDFS paths with
                // unprintable characters like ASCII 7), metastore will create another directory instead
                // of the one we are trying to "repair" here.
                Iterator<CheckResult.PartitionResult> iter = partsNotInMs.iterator();
                while (iter.hasNext()) {
                    CheckResult.PartitionResult part = iter.next();
                    try {
                        vals = Warehouse.makeValsFromName(part.getPartitionName(), vals);
                    } catch (MetaException ex) {
                        throw new HiveException(ex);
                    }
                    for (String val : vals) {
                        String escapedPath = FileUtils.escapePathName(val);
                        assert escapedPath != null;
                        if (escapedPath.equals(val))
                            continue;
                        String errorMsg = "Repair: Cannot add partition " + msckDesc.getTableName() + ':' + part.getPartitionName() + " due to invalid characters in the name";
                        if (doSkip) {
                            repairOutput.add(errorMsg);
                            iter.remove();
                        } else {
                            throw new HiveException(errorMsg);
                        }
                    }
                }
            }
            Table table = db.getTable(msckDesc.getTableName());
            AddPartitionDesc apd = new AddPartitionDesc(table.getDbName(), table.getTableName(), false);
            try {
                int batch_size = conf.getIntVar(ConfVars.HIVE_MSCK_REPAIR_BATCH_SIZE);
                if (batch_size > 0 && partsNotInMs.size() > batch_size) {
                    int counter = 0;
                    for (CheckResult.PartitionResult part : partsNotInMs) {
                        counter++;
                        apd.addPartition(Warehouse.makeSpecFromName(part.getPartitionName()), null);
                        repairOutput.add("Repair: Added partition to metastore " + msckDesc.getTableName() + ':' + part.getPartitionName());
                        if (counter % batch_size == 0 || counter == partsNotInMs.size()) {
                            db.createPartitions(apd);
                            apd = new AddPartitionDesc(table.getDbName(), table.getTableName(), false);
                        }
                    }
                } else {
                    for (CheckResult.PartitionResult part : partsNotInMs) {
                        apd.addPartition(Warehouse.makeSpecFromName(part.getPartitionName()), null);
                        repairOutput.add("Repair: Added partition to metastore " + msckDesc.getTableName() + ':' + part.getPartitionName());
                    }
                    db.createPartitions(apd);
                }
            } catch (Exception e) {
                LOG.info("Could not bulk-add partitions to metastore; trying one by one", e);
                repairOutput.clear();
                msckAddPartitionsOneByOne(db, table, partsNotInMs, repairOutput);
            }
        }
    } catch (HiveException e) {
        LOG.warn("Failed to run metacheck: ", e);
        return 1;
    } catch (IOException e) {
        LOG.warn("Failed to run metacheck: ", e);
        return 1;
    } finally {
        BufferedWriter resultOut = null;
        try {
            Path resFile = new Path(msckDesc.getResFile());
            FileSystem fs = resFile.getFileSystem(conf);
            resultOut = new BufferedWriter(new OutputStreamWriter(fs.create(resFile)));
            boolean firstWritten = false;
            firstWritten |= writeMsckResult(result.getTablesNotInMs(), "Tables not in metastore:", resultOut, firstWritten);
            firstWritten |= writeMsckResult(result.getTablesNotOnFs(), "Tables missing on filesystem:", resultOut, firstWritten);
            firstWritten |= writeMsckResult(result.getPartitionsNotInMs(), "Partitions not in metastore:", resultOut, firstWritten);
            firstWritten |= writeMsckResult(result.getPartitionsNotOnFs(), "Partitions missing from filesystem:", resultOut, firstWritten);
            for (String rout : repairOutput) {
                if (firstWritten) {
                    resultOut.write(terminator);
                } else {
                    firstWritten = true;
                }
                resultOut.write(rout);
            }
        } catch (IOException e) {
            LOG.warn("Failed to save metacheck output: ", e);
            return 1;
        } finally {
            if (resultOut != null) {
                try {
                    resultOut.close();
                } catch (IOException e) {
                    LOG.warn("Failed to close output file: ", e);
                    return 1;
                }
            }
        }
    }
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) Table(org.apache.hadoop.hive.ql.metadata.Table) ArrayList(java.util.ArrayList) IOException(java.io.IOException) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) IOException(java.io.IOException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) URISyntaxException(java.net.URISyntaxException) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) StringUtils.stringifyException(org.apache.hadoop.util.StringUtils.stringifyException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) SQLException(java.sql.SQLException) FileNotFoundException(java.io.FileNotFoundException) HiveAuthzPluginException(org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException) InvalidTableException(org.apache.hadoop.hive.ql.metadata.InvalidTableException) BufferedWriter(java.io.BufferedWriter) CheckResult(org.apache.hadoop.hive.ql.metadata.CheckResult) FileSystem(org.apache.hadoop.fs.FileSystem) AddPartitionDesc(org.apache.hadoop.hive.ql.plan.AddPartitionDesc) OutputStreamWriter(java.io.OutputStreamWriter) HiveMetaStoreChecker(org.apache.hadoop.hive.ql.metadata.HiveMetaStoreChecker) MetaException(org.apache.hadoop.hive.metastore.api.MetaException)

Aggregations

BufferedWriter (java.io.BufferedWriter)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 OutputStreamWriter (java.io.OutputStreamWriter)1 URISyntaxException (java.net.URISyntaxException)1 SQLException (java.sql.SQLException)1 ArrayList (java.util.ArrayList)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 AlreadyExistsException (org.apache.hadoop.hive.metastore.api.AlreadyExistsException)1 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)1 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)1 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)1 CheckResult (org.apache.hadoop.hive.ql.metadata.CheckResult)1 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)1 HiveMetaStoreChecker (org.apache.hadoop.hive.ql.metadata.HiveMetaStoreChecker)1 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)1 Table (org.apache.hadoop.hive.ql.metadata.Table)1 SemanticException (org.apache.hadoop.hive.ql.parse.SemanticException)1 AddPartitionDesc (org.apache.hadoop.hive.ql.plan.AddPartitionDesc)1