use of org.apache.hadoop.hive.metastore.Warehouse in project hive by apache.
the class TestMetaStoreServerUtils method testUpdateTableStatsSlow_statsUpdated.
/**
* Verify that updateTableStatsSlow really updates table statistics.
* The test does the following:
* <ol>
* <li>Create database</li>
* <li>Create unpartitioned table</li>
* <li>Create unpartitioned table which has params</li>
* <li>Call updateTableStatsSlow with arguments which should cause stats calculation</li>
* <li>Verify table statistics using mocked warehouse</li>
* <li>Create table which already have stats</li>
* <li>Call updateTableStatsSlow forcing stats recompute</li>
* <li>Verify table statistics using mocked warehouse</li>
* <li>Verifies behavior when STATS_GENERATED is set in environment context</li>
* </ol>
*/
@Test
public void testUpdateTableStatsSlow_statsUpdated() throws TException {
long fileLength = 5;
// Create database and table
Table tbl = new TableBuilder().setDbName(DB_NAME).setTableName(TABLE_NAME).addCol("id", "int").build(null);
// Set up mock warehouse
FileStatus fs1 = getFileStatus(1, true, 2, 3, 4, "/tmp/0", false);
FileStatus fs2 = getFileStatus(fileLength, false, 3, 4, 5, "/tmp/1", true);
FileStatus fs3 = getFileStatus(fileLength, false, 3, 4, 5, "/tmp/1", false);
List<FileStatus> fileStatus = Arrays.asList(fs1, fs2, fs3);
Warehouse wh = mock(Warehouse.class);
when(wh.getFileStatusesForUnpartitionedTable(db, tbl)).thenReturn(fileStatus);
Map<String, String> expected = ImmutableMap.of(NUM_FILES, "2", TOTAL_SIZE, String.valueOf(2 * fileLength), NUM_ERASURE_CODED_FILES, "1");
MetaStoreServerUtils.updateTableStatsSlow(db, tbl, wh, false, false, null);
assertThat(tbl.getParameters(), is(expected));
// Verify that when stats are already present and forceRecompute is specified they are recomputed
Table tbl1 = new TableBuilder().setDbName(DB_NAME).setTableName(TABLE_NAME).addCol("id", "int").addTableParam(NUM_FILES, "0").addTableParam(TOTAL_SIZE, "0").build(null);
when(wh.getFileStatusesForUnpartitionedTable(db, tbl1)).thenReturn(fileStatus);
MetaStoreServerUtils.updateTableStatsSlow(db, tbl1, wh, false, true, null);
assertThat(tbl1.getParameters(), is(expected));
// Verify that COLUMN_STATS_ACCURATE is removed from params
Table tbl2 = new TableBuilder().setDbName(DB_NAME).setTableName(TABLE_NAME).addCol("id", "int").addTableParam(COLUMN_STATS_ACCURATE, "true").build(null);
when(wh.getFileStatusesForUnpartitionedTable(db, tbl2)).thenReturn(fileStatus);
MetaStoreServerUtils.updateTableStatsSlow(db, tbl2, wh, false, true, null);
assertThat(tbl2.getParameters(), is(expected));
EnvironmentContext context = new EnvironmentContext(ImmutableMap.of(STATS_GENERATED, StatsSetupConst.TASK));
// Verify that if environment context has STATS_GENERATED set to task,
// COLUMN_STATS_ACCURATE in params is set to correct value
Table tbl3 = new TableBuilder().setDbName(DB_NAME).setTableName(TABLE_NAME).addCol("id", "int").addTableParam(COLUMN_STATS_ACCURATE, // The value doesn't matter
"foo").build(null);
when(wh.getFileStatusesForUnpartitionedTable(db, tbl3)).thenReturn(fileStatus);
MetaStoreServerUtils.updateTableStatsSlow(db, tbl3, wh, false, true, context);
Map<String, String> expected1 = ImmutableMap.of(NUM_FILES, "2", TOTAL_SIZE, String.valueOf(2 * fileLength), NUM_ERASURE_CODED_FILES, "1", COLUMN_STATS_ACCURATE, "{\"BASIC_STATS\":\"true\"}");
assertThat(tbl3.getParameters(), is(expected1));
}
use of org.apache.hadoop.hive.metastore.Warehouse in project hive by apache.
the class TestMetaStoreServerUtils method testUpdateTableStatsSlow_removesDoNotUpdateStats.
/**
* Verify that the call to updateTableStatsSlow() removes DO_NOT_UPDATE_STATS from table params.
*/
@Test
public void testUpdateTableStatsSlow_removesDoNotUpdateStats() throws TException {
// Create database and table
Table tbl = new TableBuilder().setDbName(DB_NAME).setTableName(TABLE_NAME).addCol("id", "int").addTableParam(StatsSetupConst.DO_NOT_UPDATE_STATS, "true").build(null);
Table tbl1 = new TableBuilder().setDbName(DB_NAME).setTableName(TABLE_NAME).addCol("id", "int").addTableParam(StatsSetupConst.DO_NOT_UPDATE_STATS, "false").build(null);
Warehouse wh = mock(Warehouse.class);
MetaStoreServerUtils.updateTableStatsSlow(db, tbl, wh, false, true, null);
assertThat(tbl.getParameters(), is(Collections.emptyMap()));
verify(wh, never()).getFileStatusesForUnpartitionedTable(db, tbl);
MetaStoreServerUtils.updateTableStatsSlow(db, tbl1, wh, true, false, null);
assertThat(tbl.getParameters(), is(Collections.emptyMap()));
verify(wh, never()).getFileStatusesForUnpartitionedTable(db, tbl1);
}
use of org.apache.hadoop.hive.metastore.Warehouse in project hive by apache.
the class AbstractMetaStoreService method start.
/**
* Starts the MetaStoreService. Be aware, as the current MetaStore does not implement clean
* shutdown, starting MetaStoreService is possible only once per test.
*
* @throws Exception if any Exception occurs
*/
public void start() throws Exception {
warehouse = new Warehouse(configuration);
warehouseRootFs = warehouse.getFs(warehouse.getWhRoot());
TrashPolicy trashPolicy = TrashPolicy.getInstance(configuration, warehouseRootFs);
trashDir = trashPolicy.getCurrentTrashDir();
}
use of org.apache.hadoop.hive.metastore.Warehouse in project hive by apache.
the class MetaToolTaskListExtTblLocs method generateExternalTableInfo.
private void generateExternalTableInfo(String dbPattern, String outputDir) throws TException, IOException, JSONException {
ObjectStore objectStore = getObjectStore();
conf = msConf != null ? msConf : objectStore.getConf();
Warehouse wh = new Warehouse(conf);
String defaultCatalog = MetaStoreUtils.getDefaultCatalog(conf);
List<String> databases = objectStore.getDatabases(defaultCatalog, dbPattern);
System.out.println("Number of databases found for given pattern: " + databases.size());
// maintain the set of leaves of the tree as a sorted set
Set<String> leafLocations = new TreeSet<>();
for (String db : databases) {
List<String> tables = objectStore.getAllTables(defaultCatalog, db);
Path defaultDbExtPath = wh.getDefaultExternalDatabasePath(db);
String defaultDbExtLocation = defaultDbExtPath.toString();
boolean isDefaultPathEmpty = true;
for (String tblName : tables) {
Table t = objectStore.getTable(defaultCatalog, db, tblName);
if (TableType.EXTERNAL_TABLE.name().equalsIgnoreCase(t.getTableType())) {
String tblLocation = t.getSd().getLocation();
Path tblPath = new Path(tblLocation);
if (isPathWithinSubtree(tblPath, defaultDbExtPath)) {
if (isDefaultPathEmpty) {
isDefaultPathEmpty = false;
// default paths should always be included, so we add them as special leaves to the tree
addDefaultPath(defaultDbExtLocation, db);
leafLocations.add(defaultDbExtLocation);
}
HashSet<String> coveredByDefault = coverageList.get(defaultDbExtLocation);
coveredByDefault.add(tblLocation);
} else if (!isCovered(leafLocations, tblPath)) {
leafLocations.add(tblLocation);
}
DataLocation dataLocation = new DataLocation(db, tblName, 0, 0, null);
inputLocations.put(tblLocation, dataLocation);
dataLocation.setSizeExtTblData(getDataSize(tblPath, conf));
// retrieving partition locations outside table-location
Map<String, String> partitionLocations = objectStore.getPartitionLocations(defaultCatalog, db, tblName, tblLocation, -1);
dataLocation.setTotalPartitions(partitionLocations.size());
for (String partitionName : partitionLocations.keySet()) {
String partLocation = partitionLocations.get(partitionName);
// null value means partition is in table location, we do not add it to input in this case.
if (partLocation == null) {
dataLocation.incrementNumPartsInTblLoc();
} else {
partLocation = partLocation + Path.SEPARATOR + Warehouse.makePartName(Warehouse.makeSpecFromName(partitionName), false);
Path partPath = new Path(partLocation);
long partDataSize = getDataSize(partPath, conf);
if (isPathWithinSubtree(partPath, defaultDbExtPath)) {
if (isDefaultPathEmpty) {
isDefaultPathEmpty = false;
addDefaultPath(defaultDbExtLocation, db);
leafLocations.add(defaultDbExtLocation);
}
if (isPathWithinSubtree(partPath, tblPath)) {
// even in non-null case, handle the corner case where location is set to table-location
// In this case, partition would be covered by table location itself, so we need not add to input
dataLocation.incrementNumPartsInTblLoc();
} else {
DataLocation partObj = new DataLocation(db, tblName, 0, 0, partitionName);
partObj.setSizeExtTblData(partDataSize);
inputLocations.put(partLocation, partObj);
coverageList.get(defaultDbExtLocation).add(partLocation);
}
} else {
if (isPathWithinSubtree(partPath, tblPath)) {
dataLocation.incrementNumPartsInTblLoc();
} else {
// only in this case, partition location is neither inside table nor in default location.
// So we add it to the graph as a separate leaf.
DataLocation partObj = new DataLocation(db, tblName, 0, 0, partitionName);
partObj.setSizeExtTblData(partDataSize);
inputLocations.put(partLocation, partObj);
if (!isCovered(leafLocations, partPath)) {
leafLocations.add(partLocation);
}
}
}
}
}
}
}
}
if (!leafLocations.isEmpty()) {
removeNestedStructure(leafLocations);
createOutputList(leafLocations, outputDir, dbPattern);
} else {
System.out.println("No external tables found to process.");
}
}
use of org.apache.hadoop.hive.metastore.Warehouse in project metacat by Netflix.
the class HiveConnectorClientConfig method warehouse.
/**
* create warehouse for file system calls.
*
* @param connectorContext connector config context
* @return WareHouse
*/
@Bean
public Warehouse warehouse(final ConnectorContext connectorContext) {
try {
final HiveConf conf = this.getDefaultConf(connectorContext);
connectorContext.getConfiguration().forEach(conf::set);
return new Warehouse(conf);
} catch (Exception e) {
throw new IllegalArgumentException(String.format("Failed creating the hive warehouse for catalog: %s", connectorContext.getCatalogName()), e);
}
}
Aggregations