Search in sources :

Example 1 with Materialization

use of org.apache.hadoop.hive.metastore.api.Materialization in project hive by apache.

the class TestMetaStoreMaterializationsCacheCleaner method testCleanerScenario1.

@Test
public void testCleanerScenario1() throws Exception {
    // create mock raw store
    Configuration conf = new Configuration();
    conf.set("metastore.materializations.invalidation.impl", "DISABLE");
    // create mock handler
    final IHMSHandler handler = mock(IHMSHandler.class);
    // initialize invalidation cache (set conf to disable)
    MaterializationsInvalidationCache.get().init(conf, handler);
    // This is a dummy test, invalidation cache is not supposed to
    // record any information.
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, 1, 1);
    int id = 2;
    BasicTxnInfo txn2 = createTxnInfo(DB_NAME, TBL_NAME_1, id);
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, id, id);
    // Create tbl2 (nothing to do)
    id = 3;
    BasicTxnInfo txn3 = createTxnInfo(DB_NAME, TBL_NAME_1, id);
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_2, id, id);
    // Cleanup (current = 4, duration = 4) -> Does nothing
    long removed = MaterializationsInvalidationCache.get().cleanup(0L);
    Assert.assertEquals(0L, removed);
    // Create mv1
    Table mv1 = mock(Table.class);
    when(mv1.getDbName()).thenReturn(DB_NAME);
    when(mv1.getTableName()).thenReturn(MV_NAME_1);
    CreationMetadata mockCM1 = new CreationMetadata(DB_NAME, MV_NAME_1, ImmutableSet.of(DB_NAME + "." + TBL_NAME_1, DB_NAME + "." + TBL_NAME_2));
    // Create txn list (highWatermark=4;minOpenTxn=Long.MAX_VALUE)
    mockCM1.setValidTxnList("3:" + Long.MAX_VALUE + "::");
    when(mv1.getCreationMetadata()).thenReturn(mockCM1);
    MaterializationsInvalidationCache.get().createMaterializedView(mockCM1.getDbName(), mockCM1.getTblName(), mockCM1.getTablesUsed(), mockCM1.getValidTxnList());
    Map<String, Materialization> invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1));
    Assert.assertTrue(invalidationInfos.isEmpty());
    id = 10;
    BasicTxnInfo txn10 = createTxnInfo(DB_NAME, TBL_NAME_2, id);
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_2, id, id);
    id = 9;
    BasicTxnInfo txn9 = createTxnInfo(DB_NAME, TBL_NAME_1, id);
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, id, id);
    // Cleanup (current = 12, duration = 4) -> Removes txn1, txn2, txn3
    removed = MaterializationsInvalidationCache.get().cleanup(8L);
    Assert.assertEquals(0L, removed);
    invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1));
    Assert.assertTrue(invalidationInfos.isEmpty());
    // Create mv2
    Table mv2 = mock(Table.class);
    when(mv2.getDbName()).thenReturn(DB_NAME);
    when(mv2.getTableName()).thenReturn(MV_NAME_2);
    CreationMetadata mockCM2 = new CreationMetadata(DB_NAME, MV_NAME_2, ImmutableSet.of(DB_NAME + "." + TBL_NAME_1, DB_NAME + "." + TBL_NAME_2));
    // Create txn list (highWatermark=10;minOpenTxn=Long.MAX_VALUE)
    mockCM2.setValidTxnList("10:" + Long.MAX_VALUE + "::");
    when(mv2.getCreationMetadata()).thenReturn(mockCM2);
    MaterializationsInvalidationCache.get().createMaterializedView(mockCM2.getDbName(), mockCM2.getTblName(), mockCM2.getTablesUsed(), mockCM2.getValidTxnList());
    when(mv2.getCreationMetadata()).thenReturn(mockCM2);
    invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1, MV_NAME_2));
    Assert.assertTrue(invalidationInfos.isEmpty());
    // Create tbl3 (nothing to do)
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_3, 11, 11);
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_3, 18, 18);
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, 14, 14);
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, 17, 17);
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_2, 16, 16);
    // Cleanup (current = 20, duration = 4) -> Removes txn10, txn11
    removed = MaterializationsInvalidationCache.get().cleanup(16L);
    Assert.assertEquals(0L, removed);
    invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1, MV_NAME_2));
    Assert.assertTrue(invalidationInfos.isEmpty());
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, 12, 12);
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_2, 15, 15);
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_2, 7, 7);
    invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1, MV_NAME_2));
    Assert.assertTrue(invalidationInfos.isEmpty());
    // Cleanup (current = 24, duration = 4) -> Removes txn9, txn14, txn15, txn16, txn17, txn18
    removed = MaterializationsInvalidationCache.get().cleanup(20L);
    Assert.assertEquals(0L, removed);
    invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1, MV_NAME_2));
    Assert.assertTrue(invalidationInfos.isEmpty());
    // Cleanup (current = 28, duration = 4) -> Removes txn9
    removed = MaterializationsInvalidationCache.get().cleanup(24L);
    Assert.assertEquals(0L, removed);
}
Also used : Materialization(org.apache.hadoop.hive.metastore.api.Materialization) CreationMetadata(org.apache.hadoop.hive.metastore.api.CreationMetadata) BasicTxnInfo(org.apache.hadoop.hive.metastore.api.BasicTxnInfo) Table(org.apache.hadoop.hive.metastore.api.Table) Configuration(org.apache.hadoop.conf.Configuration) Test(org.junit.Test)

Example 2 with Materialization

use of org.apache.hadoop.hive.metastore.api.Materialization in project hive by apache.

the class TestMetaStoreMaterializationsCacheCleaner method testCleanerScenario2.

@Test
public void testCleanerScenario2() throws Exception {
    // create mock raw store
    Configuration conf = new Configuration();
    conf.set("metastore.materializations.invalidation.impl", "DEFAULT");
    // create mock handler
    final IHMSHandler handler = mock(IHMSHandler.class);
    // initialize invalidation cache (set conf to default)
    MaterializationsInvalidationCache.get().init(conf, handler);
    // Scenario consists of the following steps:
    // Create tbl1
    // (t = 1) Insert row in tbl1
    // (t = 2) Insert row in tbl1
    // Create tbl2
    // (t = 3) Insert row in tbl2
    // Cleanup (current = 4, duration = 4) -> Does nothing
    // Create mv1
    // (t = 10) Insert row in tbl2
    // (t = 9) Insert row in tbl1 (out of order)
    // Cleanup (current = 12, duration = 4) -> Removes txn1, txn2, txn3
    // Create mv2
    // Create tbl3
    // (t = 11) Insert row in tbl3
    // (t = 18) Insert row in tbl3
    // (t = 14) Insert row in tbl1
    // (t = 17) Insert row in tbl1
    // (t = 16) Insert row in tbl2
    // Cleanup (current = 20, duration = 4) -> Removes txn10, txn11
    // (t = 12) Insert row in tbl1
    // (t = 15) Insert row in tbl2
    // (t = 7) Insert row in tbl2
    // Cleanup (current = 24, duration = 4) -> Removes txn9, txn14, txn15, txn16, txn17, txn18
    // Create tbl1 (nothing to do)
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, 1, 1);
    int id = 2;
    BasicTxnInfo txn2 = createTxnInfo(DB_NAME, TBL_NAME_1, id);
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, id, id);
    // Create tbl2 (nothing to do)
    id = 3;
    BasicTxnInfo txn3 = createTxnInfo(DB_NAME, TBL_NAME_1, id);
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_2, id, id);
    // Cleanup (current = 4, duration = 4) -> Does nothing
    long removed = MaterializationsInvalidationCache.get().cleanup(0L);
    Assert.assertEquals(0L, removed);
    // Create mv1
    Table mv1 = mock(Table.class);
    when(mv1.getDbName()).thenReturn(DB_NAME);
    when(mv1.getTableName()).thenReturn(MV_NAME_1);
    CreationMetadata mockCM1 = new CreationMetadata(DB_NAME, MV_NAME_1, ImmutableSet.of(DB_NAME + "." + TBL_NAME_1, DB_NAME + "." + TBL_NAME_2));
    // Create txn list (highWatermark=4;minOpenTxn=Long.MAX_VALUE)
    mockCM1.setValidTxnList("3:" + Long.MAX_VALUE + "::");
    when(mv1.getCreationMetadata()).thenReturn(mockCM1);
    MaterializationsInvalidationCache.get().createMaterializedView(mockCM1.getDbName(), mockCM1.getTblName(), mockCM1.getTablesUsed(), mockCM1.getValidTxnList());
    Map<String, Materialization> invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1));
    Assert.assertEquals(0L, invalidationInfos.get(MV_NAME_1).getInvalidationTime());
    id = 10;
    BasicTxnInfo txn10 = createTxnInfo(DB_NAME, TBL_NAME_2, id);
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_2, id, id);
    id = 9;
    BasicTxnInfo txn9 = createTxnInfo(DB_NAME, TBL_NAME_1, id);
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, id, id);
    // Cleanup (current = 12, duration = 4) -> Removes txn1, txn2, txn3
    removed = MaterializationsInvalidationCache.get().cleanup(8L);
    Assert.assertEquals(3L, removed);
    invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1));
    Assert.assertEquals(9L, invalidationInfos.get(MV_NAME_1).getInvalidationTime());
    // Create mv2
    Table mv2 = mock(Table.class);
    when(mv2.getDbName()).thenReturn(DB_NAME);
    when(mv2.getTableName()).thenReturn(MV_NAME_2);
    CreationMetadata mockCM2 = new CreationMetadata(DB_NAME, MV_NAME_2, ImmutableSet.of(DB_NAME + "." + TBL_NAME_1, DB_NAME + "." + TBL_NAME_2));
    // Create txn list (highWatermark=10;minOpenTxn=Long.MAX_VALUE)
    mockCM2.setValidTxnList("10:" + Long.MAX_VALUE + "::");
    when(mv2.getCreationMetadata()).thenReturn(mockCM2);
    MaterializationsInvalidationCache.get().createMaterializedView(mockCM2.getDbName(), mockCM2.getTblName(), mockCM2.getTablesUsed(), mockCM2.getValidTxnList());
    when(mv2.getCreationMetadata()).thenReturn(mockCM2);
    invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1, MV_NAME_2));
    Assert.assertEquals(9L, invalidationInfos.get(MV_NAME_1).getInvalidationTime());
    Assert.assertEquals(0L, invalidationInfos.get(MV_NAME_2).getInvalidationTime());
    // Create tbl3 (nothing to do)
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_3, 11, 11);
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_3, 18, 18);
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, 14, 14);
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, 17, 17);
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_2, 16, 16);
    // Cleanup (current = 20, duration = 4) -> Removes txn10, txn11
    removed = MaterializationsInvalidationCache.get().cleanup(16L);
    Assert.assertEquals(2L, removed);
    invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1, MV_NAME_2));
    Assert.assertEquals(9L, invalidationInfos.get(MV_NAME_1).getInvalidationTime());
    Assert.assertEquals(14L, invalidationInfos.get(MV_NAME_2).getInvalidationTime());
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, 12, 12);
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_2, 15, 15);
    MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_2, 7, 7);
    invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1, MV_NAME_2));
    Assert.assertEquals(7L, invalidationInfos.get(MV_NAME_1).getInvalidationTime());
    Assert.assertEquals(12L, invalidationInfos.get(MV_NAME_2).getInvalidationTime());
    // Cleanup (current = 24, duration = 4) -> Removes txn9, txn14, txn15, txn16, txn17, txn18
    removed = MaterializationsInvalidationCache.get().cleanup(20L);
    Assert.assertEquals(6L, removed);
    invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1, MV_NAME_2));
    Assert.assertEquals(7L, invalidationInfos.get(MV_NAME_1).getInvalidationTime());
    Assert.assertEquals(12L, invalidationInfos.get(MV_NAME_2).getInvalidationTime());
    // Cleanup (current = 28, duration = 4) -> Removes txn9
    removed = MaterializationsInvalidationCache.get().cleanup(24L);
    Assert.assertEquals(0L, removed);
}
Also used : Materialization(org.apache.hadoop.hive.metastore.api.Materialization) CreationMetadata(org.apache.hadoop.hive.metastore.api.CreationMetadata) BasicTxnInfo(org.apache.hadoop.hive.metastore.api.BasicTxnInfo) Table(org.apache.hadoop.hive.metastore.api.Table) Configuration(org.apache.hadoop.conf.Configuration) Test(org.junit.Test)

Example 3 with Materialization

use of org.apache.hadoop.hive.metastore.api.Materialization in project hive by apache.

the class Hive method getValidMaterializedViews.

/**
 * Get the materialized views that have been enabled for rewriting from the
 * metastore. If the materialized view is in the cache, we do not need to
 * parse it to generate a logical plan for the rewriting. Instead, we
 * return the version present in the cache. Further, information provided
 * by the invalidation cache is useful to know whether a materialized view
 * can be used for rewriting or not.
 *
 * @return the list of materialized views available for rewriting
 * @throws HiveException
 */
public List<RelOptMaterialization> getValidMaterializedViews(boolean materializedViewRebuild) throws HiveException {
    final long defaultDiff = HiveConf.getTimeVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REWRITING_TIME_WINDOW, TimeUnit.MILLISECONDS);
    final long currentTime = System.currentTimeMillis();
    try {
        // Final result
        List<RelOptMaterialization> result = new ArrayList<>();
        for (String dbName : getMSC().getAllDatabases()) {
            // From metastore (for security)
            List<String> materializedViewNames = getMaterializedViewsForRewriting(dbName);
            if (materializedViewNames.isEmpty()) {
                // Bail out: empty list
                continue;
            }
            List<Table> materializedViewTables = getTableObjects(dbName, materializedViewNames);
            Map<String, Materialization> databaseInvalidationInfo = getMSC().getMaterializationsInvalidationInfo(dbName, materializedViewNames);
            for (Table materializedViewTable : materializedViewTables) {
                // Check whether the materialized view is invalidated
                Materialization materializationInvalidationInfo = databaseInvalidationInfo.get(materializedViewTable.getTableName());
                if (materializationInvalidationInfo == null) {
                    LOG.debug("Materialized view " + materializedViewTable.getFullyQualifiedName() + " ignored for rewriting as there was no information loaded in the invalidation cache");
                    continue;
                }
                // Check if materialization defined its own invalidation time window
                String timeWindowString = materializedViewTable.getProperty(MATERIALIZED_VIEW_REWRITING_TIME_WINDOW);
                long diff = org.apache.commons.lang.StringUtils.isEmpty(timeWindowString) ? defaultDiff : HiveConf.toTime(timeWindowString, HiveConf.getDefaultTimeUnit(HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REWRITING_TIME_WINDOW), TimeUnit.MILLISECONDS);
                long invalidationTime = materializationInvalidationInfo.getInvalidationTime();
                // If we are doing a rebuild, we do not consider outdated materialized views either.
                if (diff == 0L || materializedViewRebuild) {
                    if (invalidationTime != 0L) {
                        // If parameter is zero, materialized view cannot be outdated at all
                        LOG.debug("Materialized view " + materializedViewTable.getFullyQualifiedName() + " ignored for rewriting as its contents are outdated");
                        continue;
                    }
                } else {
                    if (invalidationTime != 0 && invalidationTime > currentTime - diff) {
                        LOG.debug("Materialized view " + materializedViewTable.getFullyQualifiedName() + " ignored for rewriting as its contents are outdated");
                        continue;
                    }
                }
                // It passed the test, load
                RelOptMaterialization materialization = HiveMaterializedViewsRegistry.get().getRewritingMaterializedView(dbName, materializedViewTable.getTableName());
                if (materialization != null) {
                    RelOptHiveTable cachedMaterializedViewTable = (RelOptHiveTable) materialization.tableRel.getTable();
                    if (cachedMaterializedViewTable.getHiveTableMD().getCreateTime() == materializedViewTable.getCreateTime()) {
                        // It is in the cache and up to date
                        result.add(materialization);
                        continue;
                    }
                }
                // or it is not up to date.
                if (HiveMaterializedViewsRegistry.get().isInitialized()) {
                    // But the registry was fully initialized, thus we need to add it
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Materialized view " + materializedViewTable.getFullyQualifiedName() + " was not in the cache");
                    }
                    materialization = HiveMaterializedViewsRegistry.get().createMaterializedView(conf, materializedViewTable);
                    if (materialization != null) {
                        result.add(materialization);
                    }
                } else {
                    // Otherwise the registry has not been initialized, skip for the time being
                    if (LOG.isWarnEnabled()) {
                        LOG.info("Materialized view " + materializedViewTable.getFullyQualifiedName() + " was skipped " + "because cache has not been loaded yet");
                    }
                }
            }
        }
        return result;
    } catch (Exception e) {
        throw new HiveException(e);
    }
}
Also used : RelOptHiveTable(org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable) ArrayList(java.util.ArrayList) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) InvalidOperationException(org.apache.hadoop.hive.metastore.api.InvalidOperationException) TException(org.apache.thrift.TException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) HiveMetaException(org.apache.hadoop.hive.metastore.HiveMetaException) FileNotFoundException(java.io.FileNotFoundException) JDODataStoreException(javax.jdo.JDODataStoreException) Materialization(org.apache.hadoop.hive.metastore.api.Materialization) RelOptMaterialization(org.apache.calcite.plan.RelOptMaterialization) RelOptHiveTable(org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable) RelOptMaterialization(org.apache.calcite.plan.RelOptMaterialization)

Aggregations

Materialization (org.apache.hadoop.hive.metastore.api.Materialization)3 Configuration (org.apache.hadoop.conf.Configuration)2 BasicTxnInfo (org.apache.hadoop.hive.metastore.api.BasicTxnInfo)2 CreationMetadata (org.apache.hadoop.hive.metastore.api.CreationMetadata)2 Table (org.apache.hadoop.hive.metastore.api.Table)2 Test (org.junit.Test)2 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 ExecutionException (java.util.concurrent.ExecutionException)1 JDODataStoreException (javax.jdo.JDODataStoreException)1 RelOptMaterialization (org.apache.calcite.plan.RelOptMaterialization)1 HiveMetaException (org.apache.hadoop.hive.metastore.HiveMetaException)1 AlreadyExistsException (org.apache.hadoop.hive.metastore.api.AlreadyExistsException)1 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)1 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)1 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)1 RelOptHiveTable (org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable)1 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)1 TException (org.apache.thrift.TException)1