use of org.apache.hadoop.hive.ql.metadata.HiveMetaStoreChecker in project hive by apache.
the class DDLTask method msck.
/**
* MetastoreCheck, see if the data in the metastore matches what is on the
* dfs. Current version checks for tables and partitions that are either
* missing on disk on in the metastore.
*
* @param db
* The database in question.
* @param msckDesc
* Information about the tables and partitions we want to check for.
* @return Returns 0 when execution succeeds and above 0 if it fails.
*/
private int msck(Hive db, MsckDesc msckDesc) {
CheckResult result = new CheckResult();
List<String> repairOutput = new ArrayList<String>();
try {
HiveMetaStoreChecker checker = new HiveMetaStoreChecker(db);
String[] names = Utilities.getDbTableName(msckDesc.getTableName());
checker.checkMetastore(names[0], names[1], msckDesc.getPartSpecs(), result);
Set<CheckResult.PartitionResult> partsNotInMs = result.getPartitionsNotInMs();
if (msckDesc.isRepairPartitions() && !partsNotInMs.isEmpty()) {
AbstractList<String> vals = null;
String settingStr = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION);
boolean doValidate = !("ignore".equals(settingStr));
boolean doSkip = doValidate && "skip".equals(settingStr);
// The default setting is "throw"; assume doValidate && !doSkip means throw.
if (doValidate) {
// Validate that we can add partition without escaping. Escaping was originally intended
// to avoid creating invalid HDFS paths; however, if we escape the HDFS path (that we
// deem invalid but HDFS actually supports - it is possible to create HDFS paths with
// unprintable characters like ASCII 7), metastore will create another directory instead
// of the one we are trying to "repair" here.
Iterator<CheckResult.PartitionResult> iter = partsNotInMs.iterator();
while (iter.hasNext()) {
CheckResult.PartitionResult part = iter.next();
try {
vals = Warehouse.makeValsFromName(part.getPartitionName(), vals);
} catch (MetaException ex) {
throw new HiveException(ex);
}
for (String val : vals) {
String escapedPath = FileUtils.escapePathName(val);
assert escapedPath != null;
if (escapedPath.equals(val))
continue;
String errorMsg = "Repair: Cannot add partition " + msckDesc.getTableName() + ':' + part.getPartitionName() + " due to invalid characters in the name";
if (doSkip) {
repairOutput.add(errorMsg);
iter.remove();
} else {
throw new HiveException(errorMsg);
}
}
}
}
Table table = db.getTable(msckDesc.getTableName());
AddPartitionDesc apd = new AddPartitionDesc(table.getDbName(), table.getTableName(), false);
try {
int batch_size = conf.getIntVar(ConfVars.HIVE_MSCK_REPAIR_BATCH_SIZE);
if (batch_size > 0 && partsNotInMs.size() > batch_size) {
int counter = 0;
for (CheckResult.PartitionResult part : partsNotInMs) {
counter++;
apd.addPartition(Warehouse.makeSpecFromName(part.getPartitionName()), null);
repairOutput.add("Repair: Added partition to metastore " + msckDesc.getTableName() + ':' + part.getPartitionName());
if (counter % batch_size == 0 || counter == partsNotInMs.size()) {
db.createPartitions(apd);
apd = new AddPartitionDesc(table.getDbName(), table.getTableName(), false);
}
}
} else {
for (CheckResult.PartitionResult part : partsNotInMs) {
apd.addPartition(Warehouse.makeSpecFromName(part.getPartitionName()), null);
repairOutput.add("Repair: Added partition to metastore " + msckDesc.getTableName() + ':' + part.getPartitionName());
}
db.createPartitions(apd);
}
} catch (Exception e) {
LOG.info("Could not bulk-add partitions to metastore; trying one by one", e);
repairOutput.clear();
msckAddPartitionsOneByOne(db, table, partsNotInMs, repairOutput);
}
}
} catch (HiveException e) {
LOG.warn("Failed to run metacheck: ", e);
return 1;
} catch (IOException e) {
LOG.warn("Failed to run metacheck: ", e);
return 1;
} finally {
BufferedWriter resultOut = null;
try {
Path resFile = new Path(msckDesc.getResFile());
FileSystem fs = resFile.getFileSystem(conf);
resultOut = new BufferedWriter(new OutputStreamWriter(fs.create(resFile)));
boolean firstWritten = false;
firstWritten |= writeMsckResult(result.getTablesNotInMs(), "Tables not in metastore:", resultOut, firstWritten);
firstWritten |= writeMsckResult(result.getTablesNotOnFs(), "Tables missing on filesystem:", resultOut, firstWritten);
firstWritten |= writeMsckResult(result.getPartitionsNotInMs(), "Partitions not in metastore:", resultOut, firstWritten);
firstWritten |= writeMsckResult(result.getPartitionsNotOnFs(), "Partitions missing from filesystem:", resultOut, firstWritten);
for (String rout : repairOutput) {
if (firstWritten) {
resultOut.write(terminator);
} else {
firstWritten = true;
}
resultOut.write(rout);
}
} catch (IOException e) {
LOG.warn("Failed to save metacheck output: ", e);
return 1;
} finally {
if (resultOut != null) {
try {
resultOut.close();
} catch (IOException e) {
LOG.warn("Failed to close output file: ", e);
return 1;
}
}
}
}
return 0;
}
Aggregations