use of org.apache.hadoop.hive.metastore.api.Materialization in project hive by apache.
the class TestMetaStoreMaterializationsCacheCleaner method testCleanerScenario1.
@Test
public void testCleanerScenario1() throws Exception {
// create mock raw store
Configuration conf = new Configuration();
conf.set("metastore.materializations.invalidation.impl", "DISABLE");
// create mock handler
final IHMSHandler handler = mock(IHMSHandler.class);
// initialize invalidation cache (set conf to disable)
MaterializationsInvalidationCache.get().init(conf, handler);
// This is a dummy test, invalidation cache is not supposed to
// record any information.
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, 1, 1);
int id = 2;
BasicTxnInfo txn2 = createTxnInfo(DB_NAME, TBL_NAME_1, id);
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, id, id);
// Create tbl2 (nothing to do)
id = 3;
BasicTxnInfo txn3 = createTxnInfo(DB_NAME, TBL_NAME_1, id);
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_2, id, id);
// Cleanup (current = 4, duration = 4) -> Does nothing
long removed = MaterializationsInvalidationCache.get().cleanup(0L);
Assert.assertEquals(0L, removed);
// Create mv1
Table mv1 = mock(Table.class);
when(mv1.getDbName()).thenReturn(DB_NAME);
when(mv1.getTableName()).thenReturn(MV_NAME_1);
CreationMetadata mockCM1 = new CreationMetadata(DB_NAME, MV_NAME_1, ImmutableSet.of(DB_NAME + "." + TBL_NAME_1, DB_NAME + "." + TBL_NAME_2));
// Create txn list (highWatermark=4;minOpenTxn=Long.MAX_VALUE)
mockCM1.setValidTxnList("3:" + Long.MAX_VALUE + "::");
when(mv1.getCreationMetadata()).thenReturn(mockCM1);
MaterializationsInvalidationCache.get().createMaterializedView(mockCM1.getDbName(), mockCM1.getTblName(), mockCM1.getTablesUsed(), mockCM1.getValidTxnList());
Map<String, Materialization> invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1));
Assert.assertTrue(invalidationInfos.isEmpty());
id = 10;
BasicTxnInfo txn10 = createTxnInfo(DB_NAME, TBL_NAME_2, id);
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_2, id, id);
id = 9;
BasicTxnInfo txn9 = createTxnInfo(DB_NAME, TBL_NAME_1, id);
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, id, id);
// Cleanup (current = 12, duration = 4) -> Removes txn1, txn2, txn3
removed = MaterializationsInvalidationCache.get().cleanup(8L);
Assert.assertEquals(0L, removed);
invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1));
Assert.assertTrue(invalidationInfos.isEmpty());
// Create mv2
Table mv2 = mock(Table.class);
when(mv2.getDbName()).thenReturn(DB_NAME);
when(mv2.getTableName()).thenReturn(MV_NAME_2);
CreationMetadata mockCM2 = new CreationMetadata(DB_NAME, MV_NAME_2, ImmutableSet.of(DB_NAME + "." + TBL_NAME_1, DB_NAME + "." + TBL_NAME_2));
// Create txn list (highWatermark=10;minOpenTxn=Long.MAX_VALUE)
mockCM2.setValidTxnList("10:" + Long.MAX_VALUE + "::");
when(mv2.getCreationMetadata()).thenReturn(mockCM2);
MaterializationsInvalidationCache.get().createMaterializedView(mockCM2.getDbName(), mockCM2.getTblName(), mockCM2.getTablesUsed(), mockCM2.getValidTxnList());
when(mv2.getCreationMetadata()).thenReturn(mockCM2);
invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1, MV_NAME_2));
Assert.assertTrue(invalidationInfos.isEmpty());
// Create tbl3 (nothing to do)
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_3, 11, 11);
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_3, 18, 18);
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, 14, 14);
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, 17, 17);
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_2, 16, 16);
// Cleanup (current = 20, duration = 4) -> Removes txn10, txn11
removed = MaterializationsInvalidationCache.get().cleanup(16L);
Assert.assertEquals(0L, removed);
invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1, MV_NAME_2));
Assert.assertTrue(invalidationInfos.isEmpty());
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, 12, 12);
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_2, 15, 15);
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_2, 7, 7);
invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1, MV_NAME_2));
Assert.assertTrue(invalidationInfos.isEmpty());
// Cleanup (current = 24, duration = 4) -> Removes txn9, txn14, txn15, txn16, txn17, txn18
removed = MaterializationsInvalidationCache.get().cleanup(20L);
Assert.assertEquals(0L, removed);
invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1, MV_NAME_2));
Assert.assertTrue(invalidationInfos.isEmpty());
// Cleanup (current = 28, duration = 4) -> Removes txn9
removed = MaterializationsInvalidationCache.get().cleanup(24L);
Assert.assertEquals(0L, removed);
}
use of org.apache.hadoop.hive.metastore.api.Materialization in project hive by apache.
the class TestMetaStoreMaterializationsCacheCleaner method testCleanerScenario2.
@Test
public void testCleanerScenario2() throws Exception {
// create mock raw store
Configuration conf = new Configuration();
conf.set("metastore.materializations.invalidation.impl", "DEFAULT");
// create mock handler
final IHMSHandler handler = mock(IHMSHandler.class);
// initialize invalidation cache (set conf to default)
MaterializationsInvalidationCache.get().init(conf, handler);
// Scenario consists of the following steps:
// Create tbl1
// (t = 1) Insert row in tbl1
// (t = 2) Insert row in tbl1
// Create tbl2
// (t = 3) Insert row in tbl2
// Cleanup (current = 4, duration = 4) -> Does nothing
// Create mv1
// (t = 10) Insert row in tbl2
// (t = 9) Insert row in tbl1 (out of order)
// Cleanup (current = 12, duration = 4) -> Removes txn1, txn2, txn3
// Create mv2
// Create tbl3
// (t = 11) Insert row in tbl3
// (t = 18) Insert row in tbl3
// (t = 14) Insert row in tbl1
// (t = 17) Insert row in tbl1
// (t = 16) Insert row in tbl2
// Cleanup (current = 20, duration = 4) -> Removes txn10, txn11
// (t = 12) Insert row in tbl1
// (t = 15) Insert row in tbl2
// (t = 7) Insert row in tbl2
// Cleanup (current = 24, duration = 4) -> Removes txn9, txn14, txn15, txn16, txn17, txn18
// Create tbl1 (nothing to do)
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, 1, 1);
int id = 2;
BasicTxnInfo txn2 = createTxnInfo(DB_NAME, TBL_NAME_1, id);
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, id, id);
// Create tbl2 (nothing to do)
id = 3;
BasicTxnInfo txn3 = createTxnInfo(DB_NAME, TBL_NAME_1, id);
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_2, id, id);
// Cleanup (current = 4, duration = 4) -> Does nothing
long removed = MaterializationsInvalidationCache.get().cleanup(0L);
Assert.assertEquals(0L, removed);
// Create mv1
Table mv1 = mock(Table.class);
when(mv1.getDbName()).thenReturn(DB_NAME);
when(mv1.getTableName()).thenReturn(MV_NAME_1);
CreationMetadata mockCM1 = new CreationMetadata(DB_NAME, MV_NAME_1, ImmutableSet.of(DB_NAME + "." + TBL_NAME_1, DB_NAME + "." + TBL_NAME_2));
// Create txn list (highWatermark=4;minOpenTxn=Long.MAX_VALUE)
mockCM1.setValidTxnList("3:" + Long.MAX_VALUE + "::");
when(mv1.getCreationMetadata()).thenReturn(mockCM1);
MaterializationsInvalidationCache.get().createMaterializedView(mockCM1.getDbName(), mockCM1.getTblName(), mockCM1.getTablesUsed(), mockCM1.getValidTxnList());
Map<String, Materialization> invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1));
Assert.assertEquals(0L, invalidationInfos.get(MV_NAME_1).getInvalidationTime());
id = 10;
BasicTxnInfo txn10 = createTxnInfo(DB_NAME, TBL_NAME_2, id);
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_2, id, id);
id = 9;
BasicTxnInfo txn9 = createTxnInfo(DB_NAME, TBL_NAME_1, id);
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, id, id);
// Cleanup (current = 12, duration = 4) -> Removes txn1, txn2, txn3
removed = MaterializationsInvalidationCache.get().cleanup(8L);
Assert.assertEquals(3L, removed);
invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1));
Assert.assertEquals(9L, invalidationInfos.get(MV_NAME_1).getInvalidationTime());
// Create mv2
Table mv2 = mock(Table.class);
when(mv2.getDbName()).thenReturn(DB_NAME);
when(mv2.getTableName()).thenReturn(MV_NAME_2);
CreationMetadata mockCM2 = new CreationMetadata(DB_NAME, MV_NAME_2, ImmutableSet.of(DB_NAME + "." + TBL_NAME_1, DB_NAME + "." + TBL_NAME_2));
// Create txn list (highWatermark=10;minOpenTxn=Long.MAX_VALUE)
mockCM2.setValidTxnList("10:" + Long.MAX_VALUE + "::");
when(mv2.getCreationMetadata()).thenReturn(mockCM2);
MaterializationsInvalidationCache.get().createMaterializedView(mockCM2.getDbName(), mockCM2.getTblName(), mockCM2.getTablesUsed(), mockCM2.getValidTxnList());
when(mv2.getCreationMetadata()).thenReturn(mockCM2);
invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1, MV_NAME_2));
Assert.assertEquals(9L, invalidationInfos.get(MV_NAME_1).getInvalidationTime());
Assert.assertEquals(0L, invalidationInfos.get(MV_NAME_2).getInvalidationTime());
// Create tbl3 (nothing to do)
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_3, 11, 11);
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_3, 18, 18);
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, 14, 14);
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, 17, 17);
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_2, 16, 16);
// Cleanup (current = 20, duration = 4) -> Removes txn10, txn11
removed = MaterializationsInvalidationCache.get().cleanup(16L);
Assert.assertEquals(2L, removed);
invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1, MV_NAME_2));
Assert.assertEquals(9L, invalidationInfos.get(MV_NAME_1).getInvalidationTime());
Assert.assertEquals(14L, invalidationInfos.get(MV_NAME_2).getInvalidationTime());
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_1, 12, 12);
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_2, 15, 15);
MaterializationsInvalidationCache.get().notifyTableModification(DB_NAME, TBL_NAME_2, 7, 7);
invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1, MV_NAME_2));
Assert.assertEquals(7L, invalidationInfos.get(MV_NAME_1).getInvalidationTime());
Assert.assertEquals(12L, invalidationInfos.get(MV_NAME_2).getInvalidationTime());
// Cleanup (current = 24, duration = 4) -> Removes txn9, txn14, txn15, txn16, txn17, txn18
removed = MaterializationsInvalidationCache.get().cleanup(20L);
Assert.assertEquals(6L, removed);
invalidationInfos = MaterializationsInvalidationCache.get().getMaterializationInvalidationInfo(DB_NAME, ImmutableList.of(MV_NAME_1, MV_NAME_2));
Assert.assertEquals(7L, invalidationInfos.get(MV_NAME_1).getInvalidationTime());
Assert.assertEquals(12L, invalidationInfos.get(MV_NAME_2).getInvalidationTime());
// Cleanup (current = 28, duration = 4) -> Removes txn9
removed = MaterializationsInvalidationCache.get().cleanup(24L);
Assert.assertEquals(0L, removed);
}
use of org.apache.hadoop.hive.metastore.api.Materialization in project hive by apache.
the class Hive method getValidMaterializedViews.
/**
* Get the materialized views that have been enabled for rewriting from the
* metastore. If the materialized view is in the cache, we do not need to
* parse it to generate a logical plan for the rewriting. Instead, we
* return the version present in the cache. Further, information provided
* by the invalidation cache is useful to know whether a materialized view
* can be used for rewriting or not.
*
* @return the list of materialized views available for rewriting
* @throws HiveException
*/
public List<RelOptMaterialization> getValidMaterializedViews(boolean materializedViewRebuild) throws HiveException {
final long defaultDiff = HiveConf.getTimeVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REWRITING_TIME_WINDOW, TimeUnit.MILLISECONDS);
final long currentTime = System.currentTimeMillis();
try {
// Final result
List<RelOptMaterialization> result = new ArrayList<>();
for (String dbName : getMSC().getAllDatabases()) {
// From metastore (for security)
List<String> materializedViewNames = getMaterializedViewsForRewriting(dbName);
if (materializedViewNames.isEmpty()) {
// Bail out: empty list
continue;
}
List<Table> materializedViewTables = getTableObjects(dbName, materializedViewNames);
Map<String, Materialization> databaseInvalidationInfo = getMSC().getMaterializationsInvalidationInfo(dbName, materializedViewNames);
for (Table materializedViewTable : materializedViewTables) {
// Check whether the materialized view is invalidated
Materialization materializationInvalidationInfo = databaseInvalidationInfo.get(materializedViewTable.getTableName());
if (materializationInvalidationInfo == null) {
LOG.debug("Materialized view " + materializedViewTable.getFullyQualifiedName() + " ignored for rewriting as there was no information loaded in the invalidation cache");
continue;
}
// Check if materialization defined its own invalidation time window
String timeWindowString = materializedViewTable.getProperty(MATERIALIZED_VIEW_REWRITING_TIME_WINDOW);
long diff = org.apache.commons.lang.StringUtils.isEmpty(timeWindowString) ? defaultDiff : HiveConf.toTime(timeWindowString, HiveConf.getDefaultTimeUnit(HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_REWRITING_TIME_WINDOW), TimeUnit.MILLISECONDS);
long invalidationTime = materializationInvalidationInfo.getInvalidationTime();
// If we are doing a rebuild, we do not consider outdated materialized views either.
if (diff == 0L || materializedViewRebuild) {
if (invalidationTime != 0L) {
// If parameter is zero, materialized view cannot be outdated at all
LOG.debug("Materialized view " + materializedViewTable.getFullyQualifiedName() + " ignored for rewriting as its contents are outdated");
continue;
}
} else {
if (invalidationTime != 0 && invalidationTime > currentTime - diff) {
LOG.debug("Materialized view " + materializedViewTable.getFullyQualifiedName() + " ignored for rewriting as its contents are outdated");
continue;
}
}
// It passed the test, load
RelOptMaterialization materialization = HiveMaterializedViewsRegistry.get().getRewritingMaterializedView(dbName, materializedViewTable.getTableName());
if (materialization != null) {
RelOptHiveTable cachedMaterializedViewTable = (RelOptHiveTable) materialization.tableRel.getTable();
if (cachedMaterializedViewTable.getHiveTableMD().getCreateTime() == materializedViewTable.getCreateTime()) {
// It is in the cache and up to date
result.add(materialization);
continue;
}
}
// or it is not up to date.
if (HiveMaterializedViewsRegistry.get().isInitialized()) {
// But the registry was fully initialized, thus we need to add it
if (LOG.isDebugEnabled()) {
LOG.debug("Materialized view " + materializedViewTable.getFullyQualifiedName() + " was not in the cache");
}
materialization = HiveMaterializedViewsRegistry.get().createMaterializedView(conf, materializedViewTable);
if (materialization != null) {
result.add(materialization);
}
} else {
// Otherwise the registry has not been initialized, skip for the time being
if (LOG.isWarnEnabled()) {
LOG.info("Materialized view " + materializedViewTable.getFullyQualifiedName() + " was skipped " + "because cache has not been loaded yet");
}
}
}
}
return result;
} catch (Exception e) {
throw new HiveException(e);
}
}
Aggregations