Search in sources :

Example 26 with Warehouse

use of org.apache.hadoop.hive.metastore.Warehouse in project hive by apache.

the class TestMetaStoreServerUtils method testUpdateTableStatsSlow_statsUpdated.

/**
 * Verify that updateTableStatsSlow really updates table statistics.
 * The test does the following:
 * <ol>
 *   <li>Create database</li>
 *   <li>Create unpartitioned table</li>
 *   <li>Create unpartitioned table which has params</li>
 *   <li>Call updateTableStatsSlow with arguments which should cause stats calculation</li>
 *   <li>Verify table statistics using mocked warehouse</li>
 *   <li>Create table which already have stats</li>
 *   <li>Call updateTableStatsSlow forcing stats recompute</li>
 *   <li>Verify table statistics using mocked warehouse</li>
 *   <li>Verifies behavior when STATS_GENERATED is set in environment context</li>
 * </ol>
 */
@Test
public void testUpdateTableStatsSlow_statsUpdated() throws TException {
    long fileLength = 5;
    // Create database and table
    Table tbl = new TableBuilder().setDbName(DB_NAME).setTableName(TABLE_NAME).addCol("id", "int").build(null);
    // Set up mock warehouse
    FileStatus fs1 = getFileStatus(1, true, 2, 3, 4, "/tmp/0", false);
    FileStatus fs2 = getFileStatus(fileLength, false, 3, 4, 5, "/tmp/1", true);
    FileStatus fs3 = getFileStatus(fileLength, false, 3, 4, 5, "/tmp/1", false);
    List<FileStatus> fileStatus = Arrays.asList(fs1, fs2, fs3);
    Warehouse wh = mock(Warehouse.class);
    when(wh.getFileStatusesForUnpartitionedTable(db, tbl)).thenReturn(fileStatus);
    Map<String, String> expected = ImmutableMap.of(NUM_FILES, "2", TOTAL_SIZE, String.valueOf(2 * fileLength), NUM_ERASURE_CODED_FILES, "1");
    MetaStoreServerUtils.updateTableStatsSlow(db, tbl, wh, false, false, null);
    assertThat(tbl.getParameters(), is(expected));
    // Verify that when stats are already present and forceRecompute is specified they are recomputed
    Table tbl1 = new TableBuilder().setDbName(DB_NAME).setTableName(TABLE_NAME).addCol("id", "int").addTableParam(NUM_FILES, "0").addTableParam(TOTAL_SIZE, "0").build(null);
    when(wh.getFileStatusesForUnpartitionedTable(db, tbl1)).thenReturn(fileStatus);
    MetaStoreServerUtils.updateTableStatsSlow(db, tbl1, wh, false, true, null);
    assertThat(tbl1.getParameters(), is(expected));
    // Verify that COLUMN_STATS_ACCURATE is removed from params
    Table tbl2 = new TableBuilder().setDbName(DB_NAME).setTableName(TABLE_NAME).addCol("id", "int").addTableParam(COLUMN_STATS_ACCURATE, "true").build(null);
    when(wh.getFileStatusesForUnpartitionedTable(db, tbl2)).thenReturn(fileStatus);
    MetaStoreServerUtils.updateTableStatsSlow(db, tbl2, wh, false, true, null);
    assertThat(tbl2.getParameters(), is(expected));
    EnvironmentContext context = new EnvironmentContext(ImmutableMap.of(STATS_GENERATED, StatsSetupConst.TASK));
    // Verify that if environment context has STATS_GENERATED set to task,
    // COLUMN_STATS_ACCURATE in params is set to correct value
    Table tbl3 = new TableBuilder().setDbName(DB_NAME).setTableName(TABLE_NAME).addCol("id", "int").addTableParam(COLUMN_STATS_ACCURATE, // The value doesn't matter
    "foo").build(null);
    when(wh.getFileStatusesForUnpartitionedTable(db, tbl3)).thenReturn(fileStatus);
    MetaStoreServerUtils.updateTableStatsSlow(db, tbl3, wh, false, true, context);
    Map<String, String> expected1 = ImmutableMap.of(NUM_FILES, "2", TOTAL_SIZE, String.valueOf(2 * fileLength), NUM_ERASURE_CODED_FILES, "1", COLUMN_STATS_ACCURATE, "{\"BASIC_STATS\":\"true\"}");
    assertThat(tbl3.getParameters(), is(expected1));
}
Also used : EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) Warehouse(org.apache.hadoop.hive.metastore.Warehouse) Table(org.apache.hadoop.hive.metastore.api.Table) FileStatus(org.apache.hadoop.fs.FileStatus) TableBuilder(org.apache.hadoop.hive.metastore.client.builder.TableBuilder) MetastoreUnitTest(org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest) Test(org.junit.Test)

Example 27 with Warehouse

use of org.apache.hadoop.hive.metastore.Warehouse in project hive by apache.

the class TestMetaStoreServerUtils method testUpdateTableStatsSlow_removesDoNotUpdateStats.

/**
 * Verify that the call to updateTableStatsSlow() removes DO_NOT_UPDATE_STATS from table params.
 */
@Test
public void testUpdateTableStatsSlow_removesDoNotUpdateStats() throws TException {
    // Create database and table
    Table tbl = new TableBuilder().setDbName(DB_NAME).setTableName(TABLE_NAME).addCol("id", "int").addTableParam(StatsSetupConst.DO_NOT_UPDATE_STATS, "true").build(null);
    Table tbl1 = new TableBuilder().setDbName(DB_NAME).setTableName(TABLE_NAME).addCol("id", "int").addTableParam(StatsSetupConst.DO_NOT_UPDATE_STATS, "false").build(null);
    Warehouse wh = mock(Warehouse.class);
    MetaStoreServerUtils.updateTableStatsSlow(db, tbl, wh, false, true, null);
    assertThat(tbl.getParameters(), is(Collections.emptyMap()));
    verify(wh, never()).getFileStatusesForUnpartitionedTable(db, tbl);
    MetaStoreServerUtils.updateTableStatsSlow(db, tbl1, wh, true, false, null);
    assertThat(tbl.getParameters(), is(Collections.emptyMap()));
    verify(wh, never()).getFileStatusesForUnpartitionedTable(db, tbl1);
}
Also used : Warehouse(org.apache.hadoop.hive.metastore.Warehouse) Table(org.apache.hadoop.hive.metastore.api.Table) TableBuilder(org.apache.hadoop.hive.metastore.client.builder.TableBuilder) MetastoreUnitTest(org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest) Test(org.junit.Test)

Example 28 with Warehouse

use of org.apache.hadoop.hive.metastore.Warehouse in project hive by apache.

the class AbstractMetaStoreService method start.

/**
 * Starts the MetaStoreService. Be aware, as the current MetaStore does not implement clean
 * shutdown, starting MetaStoreService is possible only once per test.
 *
 * @throws Exception if any Exception occurs
 */
public void start() throws Exception {
    warehouse = new Warehouse(configuration);
    warehouseRootFs = warehouse.getFs(warehouse.getWhRoot());
    TrashPolicy trashPolicy = TrashPolicy.getInstance(configuration, warehouseRootFs);
    trashDir = trashPolicy.getCurrentTrashDir();
}
Also used : Warehouse(org.apache.hadoop.hive.metastore.Warehouse) TrashPolicy(org.apache.hadoop.fs.TrashPolicy)

Example 29 with Warehouse

use of org.apache.hadoop.hive.metastore.Warehouse in project hive by apache.

the class MetaToolTaskListExtTblLocs method generateExternalTableInfo.

private void generateExternalTableInfo(String dbPattern, String outputDir) throws TException, IOException, JSONException {
    ObjectStore objectStore = getObjectStore();
    conf = msConf != null ? msConf : objectStore.getConf();
    Warehouse wh = new Warehouse(conf);
    String defaultCatalog = MetaStoreUtils.getDefaultCatalog(conf);
    List<String> databases = objectStore.getDatabases(defaultCatalog, dbPattern);
    System.out.println("Number of databases found for given pattern: " + databases.size());
    // maintain the set of leaves of the tree as a sorted set
    Set<String> leafLocations = new TreeSet<>();
    for (String db : databases) {
        List<String> tables = objectStore.getAllTables(defaultCatalog, db);
        Path defaultDbExtPath = wh.getDefaultExternalDatabasePath(db);
        String defaultDbExtLocation = defaultDbExtPath.toString();
        boolean isDefaultPathEmpty = true;
        for (String tblName : tables) {
            Table t = objectStore.getTable(defaultCatalog, db, tblName);
            if (TableType.EXTERNAL_TABLE.name().equalsIgnoreCase(t.getTableType())) {
                String tblLocation = t.getSd().getLocation();
                Path tblPath = new Path(tblLocation);
                if (isPathWithinSubtree(tblPath, defaultDbExtPath)) {
                    if (isDefaultPathEmpty) {
                        isDefaultPathEmpty = false;
                        // default paths should always be included, so we add them as special leaves to the tree
                        addDefaultPath(defaultDbExtLocation, db);
                        leafLocations.add(defaultDbExtLocation);
                    }
                    HashSet<String> coveredByDefault = coverageList.get(defaultDbExtLocation);
                    coveredByDefault.add(tblLocation);
                } else if (!isCovered(leafLocations, tblPath)) {
                    leafLocations.add(tblLocation);
                }
                DataLocation dataLocation = new DataLocation(db, tblName, 0, 0, null);
                inputLocations.put(tblLocation, dataLocation);
                dataLocation.setSizeExtTblData(getDataSize(tblPath, conf));
                // retrieving partition locations outside table-location
                Map<String, String> partitionLocations = objectStore.getPartitionLocations(defaultCatalog, db, tblName, tblLocation, -1);
                dataLocation.setTotalPartitions(partitionLocations.size());
                for (String partitionName : partitionLocations.keySet()) {
                    String partLocation = partitionLocations.get(partitionName);
                    // null value means partition is in table location, we do not add it to input in this case.
                    if (partLocation == null) {
                        dataLocation.incrementNumPartsInTblLoc();
                    } else {
                        partLocation = partLocation + Path.SEPARATOR + Warehouse.makePartName(Warehouse.makeSpecFromName(partitionName), false);
                        Path partPath = new Path(partLocation);
                        long partDataSize = getDataSize(partPath, conf);
                        if (isPathWithinSubtree(partPath, defaultDbExtPath)) {
                            if (isDefaultPathEmpty) {
                                isDefaultPathEmpty = false;
                                addDefaultPath(defaultDbExtLocation, db);
                                leafLocations.add(defaultDbExtLocation);
                            }
                            if (isPathWithinSubtree(partPath, tblPath)) {
                                // even in non-null case, handle the corner case where location is set to table-location
                                // In this case, partition would be covered by table location itself, so we need not add to input
                                dataLocation.incrementNumPartsInTblLoc();
                            } else {
                                DataLocation partObj = new DataLocation(db, tblName, 0, 0, partitionName);
                                partObj.setSizeExtTblData(partDataSize);
                                inputLocations.put(partLocation, partObj);
                                coverageList.get(defaultDbExtLocation).add(partLocation);
                            }
                        } else {
                            if (isPathWithinSubtree(partPath, tblPath)) {
                                dataLocation.incrementNumPartsInTblLoc();
                            } else {
                                // only in this case, partition location is neither inside table nor in default location.
                                // So we add it to the graph  as a separate leaf.
                                DataLocation partObj = new DataLocation(db, tblName, 0, 0, partitionName);
                                partObj.setSizeExtTblData(partDataSize);
                                inputLocations.put(partLocation, partObj);
                                if (!isCovered(leafLocations, partPath)) {
                                    leafLocations.add(partLocation);
                                }
                            }
                        }
                    }
                }
            }
        }
    }
    if (!leafLocations.isEmpty()) {
        removeNestedStructure(leafLocations);
        createOutputList(leafLocations, outputDir, dbPattern);
    } else {
        System.out.println("No external tables found to process.");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ObjectStore(org.apache.hadoop.hive.metastore.ObjectStore) Warehouse(org.apache.hadoop.hive.metastore.Warehouse) Table(org.apache.hadoop.hive.metastore.api.Table) TreeSet(java.util.TreeSet)

Example 30 with Warehouse

use of org.apache.hadoop.hive.metastore.Warehouse in project metacat by Netflix.

the class HiveConnectorClientConfig method warehouse.

/**
 * create warehouse for file system calls.
 *
 * @param connectorContext connector config context
 * @return WareHouse
 */
@Bean
public Warehouse warehouse(final ConnectorContext connectorContext) {
    try {
        final HiveConf conf = this.getDefaultConf(connectorContext);
        connectorContext.getConfiguration().forEach(conf::set);
        return new Warehouse(conf);
    } catch (Exception e) {
        throw new IllegalArgumentException(String.format("Failed creating the hive warehouse for catalog: %s", connectorContext.getCatalogName()), e);
    }
}
Also used : Warehouse(org.apache.hadoop.hive.metastore.Warehouse) HiveConf(org.apache.hadoop.hive.conf.HiveConf) Bean(org.springframework.context.annotation.Bean)

Aggregations

Warehouse (org.apache.hadoop.hive.metastore.Warehouse)31 Path (org.apache.hadoop.fs.Path)15 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)14 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)14 Table (org.apache.hadoop.hive.ql.metadata.Table)10 ArrayList (java.util.ArrayList)8 IOException (java.io.IOException)7 FileSystem (org.apache.hadoop.fs.FileSystem)6 Table (org.apache.hadoop.hive.metastore.api.Table)6 HashMap (java.util.HashMap)4 InvalidTableException (org.apache.hadoop.hive.ql.metadata.InvalidTableException)4 Test (org.junit.Test)4 URI (java.net.URI)3 URISyntaxException (java.net.URISyntaxException)3 LinkedHashMap (java.util.LinkedHashMap)3 Configuration (org.apache.hadoop.conf.Configuration)3 ObjectStore (org.apache.hadoop.hive.metastore.ObjectStore)3 MetastoreUnitTest (org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest)3 EnvironmentContext (org.apache.hadoop.hive.metastore.api.EnvironmentContext)3 TableBuilder (org.apache.hadoop.hive.metastore.client.builder.TableBuilder)3