Search in sources :

Example 1 with CheckResult

use of org.apache.hadoop.hive.metastore.CheckResult in project hive by apache.

the class TestHiveMetaStoreChecker method testSingleThreadedDeeplyNestedTables.

/**
 * Tests single threaded implementation for deeply nested partitioned tables
 *
 * @throws Exception ex
 */
@Test
public void testSingleThreadedDeeplyNestedTables() throws Exception {
    // set num of threads to 0 so that single-threaded checkMetastore is called
    hive.getConf().set(MetastoreConf.ConfVars.FS_HANDLER_THREADS_COUNT.getVarname(), "0");
    int poolSize = 2;
    // create a deeply nested table which has more partition keys than the pool size
    Table testTable = createPartitionedTestTable(dbName, tableName, poolSize + 2, 0);
    // add 10 partitions on the filesystem
    createPartitionsDirectoriesOnFS(testTable, 10);
    CheckResult result = checker.checkMetastore(catName, dbName, tableName, null, null);
    assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs());
    assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs());
    assertEquals(Collections.<CheckResult.PartitionResult>emptySet(), result.getPartitionsNotOnFs());
    assertEquals(10, result.getPartitionsNotInMs().size());
}
Also used : CheckResult(org.apache.hadoop.hive.metastore.CheckResult) Test(org.junit.Test)

Example 2 with CheckResult

use of org.apache.hadoop.hive.metastore.CheckResult in project hive by apache.

the class TestHiveMetaStoreChecker method testTableCheck.

@Test
public void testTableCheck() throws HiveException, IOException, TException, MetastoreException, MetaException {
    CheckResult result = checker.checkMetastore(catName, dbName, null, null, null);
    // we haven't added anything so should return an all ok
    assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs());
    assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs());
    assertEquals(Collections.<CheckResult.PartitionResult>emptySet(), result.getPartitionsNotOnFs());
    assertEquals(Collections.<CheckResult.PartitionResult>emptySet(), result.getPartitionsNotInMs());
    // check table only, should not exist in ms
    result = checker.checkMetastore(catName, dbName, tableName, null, null);
    assertEquals(1, result.getTablesNotInMs().size());
    assertEquals(tableName, result.getTablesNotInMs().iterator().next());
    assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs());
    assertEquals(Collections.<CheckResult.PartitionResult>emptySet(), result.getPartitionsNotOnFs());
    assertEquals(Collections.<CheckResult.PartitionResult>emptySet(), result.getPartitionsNotInMs());
    Database db = new Database();
    db.setCatalogName(catName);
    db.setName(dbName);
    msc.createDatabase(db);
    Table table = new Table(dbName, tableName);
    table.setDbName(dbName);
    table.setInputFormatClass(TextInputFormat.class);
    table.setOutputFormatClass(HiveIgnoreKeyTextOutputFormat.class);
    hive.createTable(table);
    Assert.assertTrue(table.getTTable().isSetId());
    table.getTTable().unsetId();
    // now we've got a table, check that it works
    // first check all (1) tables
    result = checker.checkMetastore(catName, dbName, null, null, null);
    assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs());
    assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs());
    assertEquals(Collections.<CheckResult.PartitionResult>emptySet(), result.getPartitionsNotOnFs());
    assertEquals(Collections.<CheckResult.PartitionResult>emptySet(), result.getPartitionsNotInMs());
    // then let's check the one we know about
    result = checker.checkMetastore(catName, dbName, tableName, null, null);
    assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs());
    assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs());
    assertEquals(Collections.<CheckResult.PartitionResult>emptySet(), result.getPartitionsNotOnFs());
    assertEquals(Collections.<CheckResult.PartitionResult>emptySet(), result.getPartitionsNotInMs());
    // remove the table folder
    fs = table.getPath().getFileSystem(hive.getConf());
    fs.delete(table.getPath(), true);
    // now this shouldn't find the path on the fs
    result = checker.checkMetastore(catName, dbName, tableName, null, null);
    assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs());
    assertEquals(1, result.getTablesNotOnFs().size());
    assertEquals(tableName, result.getTablesNotOnFs().iterator().next());
    assertEquals(Collections.<CheckResult.PartitionResult>emptySet(), result.getPartitionsNotOnFs());
    assertEquals(Collections.<CheckResult.PartitionResult>emptySet(), result.getPartitionsNotInMs());
    // put it back and one additional table
    fs.mkdirs(table.getPath());
    Path fakeTable = table.getPath().getParent().suffix(Path.SEPARATOR + "faketable");
    fs.mkdirs(fakeTable);
    fs.deleteOnExit(fakeTable);
    // find the extra table
    result = checker.checkMetastore(catName, dbName, null, null, null);
    assertEquals(1, result.getTablesNotInMs().size());
    assertEquals(fakeTable.getName(), Lists.newArrayList(result.getTablesNotInMs()).get(0));
    assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs());
    assertEquals(Collections.<CheckResult.PartitionResult>emptySet(), result.getPartitionsNotOnFs());
    assertEquals(Collections.<CheckResult.PartitionResult>emptySet(), result.getPartitionsNotInMs());
    // create a new external table
    hive.dropTable(dbName, tableName);
    table.setProperty("EXTERNAL", "TRUE");
    hive.createTable(table);
    // should return all ok
    result = checker.checkMetastore(catName, dbName, null, null, null);
    assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs());
    assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs());
    assertEquals(Collections.<CheckResult.PartitionResult>emptySet(), result.getPartitionsNotOnFs());
    assertEquals(Collections.<CheckResult.PartitionResult>emptySet(), result.getPartitionsNotInMs());
}
Also used : Path(org.apache.hadoop.fs.Path) CheckResult(org.apache.hadoop.hive.metastore.CheckResult) Database(org.apache.hadoop.hive.metastore.api.Database) Test(org.junit.Test)

Example 3 with CheckResult

use of org.apache.hadoop.hive.metastore.CheckResult in project hive by apache.

the class TestHiveMetaStoreChecker method testAddPartitionMMBase.

@Test
public void testAddPartitionMMBase() throws Exception {
    Table table = createTestTable(true);
    List<Partition> partitions = hive.getPartitions(table);
    assertEquals(2, partitions.size());
    // add a partition dir on fs
    fs = partitions.get(0).getDataLocation().getFileSystem(hive.getConf());
    Path newPart = addFolderToPath(fs, table.getDataLocation().toString(), partDateName + "=2017-01-01/" + partCityName + "=paloalto");
    // Add a few deltas
    addFolderToPath(fs, newPart.toString(), "delta_0000001_0000001_0000");
    addFolderToPath(fs, newPart.toString(), "delta_0000002_0000002_0000");
    addFolderToPath(fs, newPart.toString(), "delta_0000003_0000003_0000");
    addFolderToPath(fs, newPart.toString(), "base_0000004");
    CheckResult result = checker.checkMetastore(catName, dbName, tableName, null, null);
    assertEquals(Collections.<CheckResult.PartitionResult>emptySet(), result.getPartitionsNotOnFs());
    assertEquals(1, result.getPartitionsNotInMs().size());
    // Found the highest writeId
    assertEquals(4, result.getPartitionsNotInMs().iterator().next().getMaxWriteId());
    assertEquals(0, result.getPartitionsNotInMs().iterator().next().getMaxTxnId());
}
Also used : Path(org.apache.hadoop.fs.Path) CheckResult(org.apache.hadoop.hive.metastore.CheckResult) Test(org.junit.Test)

Example 4 with CheckResult

use of org.apache.hadoop.hive.metastore.CheckResult in project hive by apache.

the class TestHiveMetaStoreChecker method testSkipInvalidOrderForPartitionKeysOnFS.

/**
 * In skip mode msck should ignore invalid partitions instead of throwing exception.
 * @throws Exception ex
 */
@Test
public void testSkipInvalidOrderForPartitionKeysOnFS() throws Exception {
    hive.getConf().set(MetastoreConf.ConfVars.MSCK_PATH_VALIDATION.getVarname(), "skip");
    checker = new HiveMetaStoreChecker(msc, hive.getConf());
    Table testTable = createPartitionedTestTable(dbName, tableName, 2, 0);
    // add 10 partitions on the filesystem
    createInvalidPartitionDirsOnFS(testTable, 2);
    // add 10 partitions on the filesystem
    createPartitionsDirectoriesOnFS(testTable, 2);
    CheckResult result = checker.checkMetastore(catName, dbName, tableName, null, null);
    assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs());
    assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs());
    assertEquals(Collections.<CheckResult.PartitionResult>emptySet(), result.getPartitionsNotOnFs());
    // only 2 valid partitions should be added
    assertEquals(2, result.getPartitionsNotInMs().size());
}
Also used : CheckResult(org.apache.hadoop.hive.metastore.CheckResult) HiveMetaStoreChecker(org.apache.hadoop.hive.metastore.HiveMetaStoreChecker) Test(org.junit.Test)

Example 5 with CheckResult

use of org.apache.hadoop.hive.metastore.CheckResult in project hive by apache.

the class TestHiveMetaStoreChecker method testPartitionsNotInMs.

/**
 * Test multi-threaded implementation of checker to find out missing partitions.
 * @throws Exception ex
 */
@Test
public void testPartitionsNotInMs() throws Exception {
    Table testTable = createPartitionedTestTable(dbName, tableName, 2, 0);
    // add 10 partitions on the filesystem
    createPartitionsDirectoriesOnFS(testTable, 10);
    CheckResult result = checker.checkMetastore(catName, dbName, tableName, null, null);
    assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs());
    assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs());
    assertEquals(Collections.<CheckResult.PartitionResult>emptySet(), result.getPartitionsNotOnFs());
    assertEquals(10, result.getPartitionsNotInMs().size());
}
Also used : CheckResult(org.apache.hadoop.hive.metastore.CheckResult) Test(org.junit.Test)

Aggregations

CheckResult (org.apache.hadoop.hive.metastore.CheckResult)14 Test (org.junit.Test)14 Path (org.apache.hadoop.fs.Path)7 HiveMetaStoreChecker (org.apache.hadoop.hive.metastore.HiveMetaStoreChecker)2 Database (org.apache.hadoop.hive.metastore.api.Database)1