Search in sources :

Example 1 with PartitionResult

use of org.apache.hadoop.hive.ql.metadata.CheckResult.PartitionResult in project hive by apache.

the class HiveMetaStoreChecker method checkTable.

/**
   * Check the metastore for inconsistencies, data missing in either the
   * metastore or on the dfs.
   *
   * @param dbName
   *          Name of the database
   * @param tableName
   *          Name of the table
   * @param partitions
   *          Partitions to check, if null or empty get all the partitions.
   * @param result
   *          Result object
   * @throws HiveException
   *           Failed to get required information from the metastore.
   * @throws IOException
   *           Most likely filesystem related
   * @throws MetaException
   *           Failed to get required information from the metastore.
   */
void checkTable(String dbName, String tableName, List<? extends Map<String, String>> partitions, CheckResult result) throws MetaException, IOException, HiveException {
    Table table = null;
    try {
        table = hive.getTable(dbName, tableName);
    } catch (HiveException e) {
        result.getTablesNotInMs().add(tableName);
        return;
    }
    List<Partition> parts = new ArrayList<Partition>();
    boolean findUnknownPartitions = true;
    if (table.isPartitioned()) {
        if (partitions == null || partitions.isEmpty()) {
            PrunedPartitionList prunedPartList = PartitionPruner.prune(table, null, conf, toString(), null);
            // no partitions specified, let's get all
            parts.addAll(prunedPartList.getPartitions());
        } else {
            // we're interested in specific partitions,
            // don't check for any others
            findUnknownPartitions = false;
            for (Map<String, String> map : partitions) {
                Partition part = hive.getPartition(table, map, false);
                if (part == null) {
                    PartitionResult pr = new PartitionResult();
                    pr.setTableName(tableName);
                    pr.setPartitionName(Warehouse.makePartPath(map));
                    result.getPartitionsNotInMs().add(pr);
                } else {
                    parts.add(part);
                }
            }
        }
    }
    checkTable(table, parts, findUnknownPartitions, result);
}
Also used : PrunedPartitionList(org.apache.hadoop.hive.ql.parse.PrunedPartitionList) ArrayList(java.util.ArrayList) PartitionResult(org.apache.hadoop.hive.ql.metadata.CheckResult.PartitionResult)

Example 2 with PartitionResult

use of org.apache.hadoop.hive.ql.metadata.CheckResult.PartitionResult in project hive by apache.

the class HiveMetaStoreChecker method checkTable.

/**
   * Check the metastore for inconsistencies, data missing in either the
   * metastore or on the dfs.
   *
   * @param table
   *          Table to check
   * @param parts
   *          Partitions to check
   * @param result
   *          Result object
   * @param findUnknownPartitions
   *          Should we try to find unknown partitions?
   * @throws IOException
   *           Could not get information from filesystem
   * @throws HiveException
   *           Could not create Partition object
   */
void checkTable(Table table, List<Partition> parts, boolean findUnknownPartitions, CheckResult result) throws IOException, HiveException {
    Path tablePath = table.getPath();
    FileSystem fs = tablePath.getFileSystem(conf);
    if (!fs.exists(tablePath)) {
        result.getTablesNotOnFs().add(table.getTableName());
        return;
    }
    Set<Path> partPaths = new HashSet<Path>();
    // check that the partition folders exist on disk
    for (Partition partition : parts) {
        if (partition == null) {
            // most likely the user specified an invalid partition
            continue;
        }
        Path partPath = partition.getDataLocation();
        fs = partPath.getFileSystem(conf);
        if (!fs.exists(partPath)) {
            PartitionResult pr = new PartitionResult();
            pr.setPartitionName(partition.getName());
            pr.setTableName(partition.getTable().getTableName());
            result.getPartitionsNotOnFs().add(pr);
        }
        for (int i = 0; i < partition.getSpec().size(); i++) {
            partPaths.add(partPath.makeQualified(fs));
            partPath = partPath.getParent();
        }
    }
    if (findUnknownPartitions) {
        findUnknownPartitions(table, partPaths, result);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) PartitionResult(org.apache.hadoop.hive.ql.metadata.CheckResult.PartitionResult) HashSet(java.util.HashSet)

Example 3 with PartitionResult

use of org.apache.hadoop.hive.ql.metadata.CheckResult.PartitionResult in project hive by apache.

the class HiveMetaStoreChecker method findUnknownPartitions.

/**
   * Find partitions on the fs that are unknown to the metastore.
   *
   * @param table
   *          Table where the partitions would be located
   * @param partPaths
   *          Paths of the partitions the ms knows about
   * @param result
   *          Result object
   * @throws IOException
   *           Thrown if we fail at fetching listings from the fs.
   * @throws HiveException 
   */
void findUnknownPartitions(Table table, Set<Path> partPaths, CheckResult result) throws IOException, HiveException {
    Path tablePath = table.getPath();
    // now check the table folder and see if we find anything
    // that isn't in the metastore
    Set<Path> allPartDirs = new HashSet<Path>();
    checkPartitionDirs(tablePath, allPartDirs, table.getPartCols().size());
    // don't want the table dir
    allPartDirs.remove(tablePath);
    // remove the partition paths we know about
    allPartDirs.removeAll(partPaths);
    Set<String> partColNames = Sets.newHashSet();
    for (FieldSchema fSchema : table.getPartCols()) {
        partColNames.add(fSchema.getName());
    }
    // we should now only have the unexpected folders left
    for (Path partPath : allPartDirs) {
        FileSystem fs = partPath.getFileSystem(conf);
        String partitionName = getPartitionName(fs.makeQualified(tablePath), partPath, partColNames);
        LOG.debug("PartitionName: " + partitionName);
        if (partitionName != null) {
            PartitionResult pr = new PartitionResult();
            pr.setPartitionName(partitionName);
            pr.setTableName(table.getTableName());
            result.getPartitionsNotInMs().add(pr);
        }
    }
    LOG.debug("Number of partitions not in metastore : " + result.getPartitionsNotInMs().size());
}
Also used : Path(org.apache.hadoop.fs.Path) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) FileSystem(org.apache.hadoop.fs.FileSystem) PartitionResult(org.apache.hadoop.hive.ql.metadata.CheckResult.PartitionResult) HashSet(java.util.HashSet)

Aggregations

PartitionResult (org.apache.hadoop.hive.ql.metadata.CheckResult.PartitionResult)3 HashSet (java.util.HashSet)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 Path (org.apache.hadoop.fs.Path)2 ArrayList (java.util.ArrayList)1 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)1 PrunedPartitionList (org.apache.hadoop.hive.ql.parse.PrunedPartitionList)1