use of co.cask.cdap.data2.dataset2.DatasetFramework in project cdap by caskdata.
the class FileMetadataCleanerTest method testScanAndDeleteNewMetadata.
@Test
public void testScanAndDeleteNewMetadata() throws Exception {
// use file meta data manager to write meta data in old format
// use file meta writer to write meta data in new format
// scan for old files and make sure we only get the old meta data entries.
DatasetFramework datasetFramework = injector.getInstance(DatasetFramework.class);
DatasetManager datasetManager = new DefaultDatasetManager(datasetFramework, NamespaceId.SYSTEM, co.cask.cdap.common.service.RetryStrategies.noRetry());
Transactional transactional = Transactions.createTransactionalWithRetry(Transactions.createTransactional(new MultiThreadDatasetCache(new SystemDatasetInstantiator(datasetFramework), injector.getInstance(TransactionSystemClient.class), NamespaceId.SYSTEM, ImmutableMap.<String, String>of(), null, null)), RetryStrategies.retryOnConflict(20, 100));
FileMetaDataWriter fileMetaDataWriter = new FileMetaDataWriter(datasetManager, transactional);
FileMetadataCleaner fileMetadataCleaner = new FileMetadataCleaner(datasetManager, transactional);
try {
long currentTime = System.currentTimeMillis();
long eventTimestamp = currentTime - 100;
LogPathIdentifier logPathIdentifier = new LogPathIdentifier("testNs2", "testApp", "testFlow");
LocationFactory locationFactory = injector.getInstance(LocationFactory.class);
List<String> expected = new ArrayList<>();
for (int i = 0; i < 100; i++) {
Location location = locationFactory.create("testFlowFile" + i);
// values : event time is 100ms behind current timestamp
fileMetaDataWriter.writeMetaData(logPathIdentifier, eventTimestamp + i, currentTime + i, location);
expected.add(location.toURI().getPath());
}
long tillTime = currentTime + 50;
List<FileMetadataCleaner.DeletedEntry> deletedEntries = fileMetadataCleaner.scanAndGetFilesToDelete(tillTime, TRANSACTION_TIMEOUT);
// we should have deleted 51 rows, till time is inclusive
Assert.assertEquals(51, deletedEntries.size());
int count = 0;
for (FileMetadataCleaner.DeletedEntry deletedEntry : deletedEntries) {
Assert.assertEquals(expected.get(count), deletedEntry.getPath());
count += 1;
}
// now add 10 entries for spark
logPathIdentifier = new LogPathIdentifier("testNs2", "testApp", "testSpark");
expected = new ArrayList<>();
for (int i = 0; i < 10; i++) {
Location location = locationFactory.create("testSparkFile" + i);
// values : event time is 100ms behind current timestamp
fileMetaDataWriter.writeMetaData(logPathIdentifier, eventTimestamp + i, currentTime + i, location);
expected.add(location.toURI().getPath());
}
// lets keep the same till time - this should only delete the spark entries now
deletedEntries = fileMetadataCleaner.scanAndGetFilesToDelete(tillTime, TRANSACTION_TIMEOUT);
// we should have deleted 51 rows, till time is inclusive
Assert.assertEquals(10, deletedEntries.size());
count = 0;
for (FileMetadataCleaner.DeletedEntry deletedEntry : deletedEntries) {
Assert.assertEquals(expected.get(count), deletedEntry.getPath());
count += 1;
}
// now add 10 entries in mr context in time range 60-70
logPathIdentifier = new LogPathIdentifier("testNs2", "testApp", "testMr");
expected = new ArrayList<>();
// flow should come up at the beginning in the expected list
for (int i = 51; i <= 70; i++) {
expected.add(locationFactory.create("testFlowFile" + i).toURI().getPath());
}
for (int i = 0; i < 10; i++) {
Location location = locationFactory.create("testMrFile" + i);
// values : event time is 100ms behind current timestamp
fileMetaDataWriter.writeMetaData(logPathIdentifier, eventTimestamp + i, currentTime + i, location);
expected.add(location.toURI().getPath());
}
List<String> nextExpected = new ArrayList<>();
logPathIdentifier = new LogPathIdentifier("testNs2", "testApp", "testCustomAction");
for (int i = 90; i < 100; i++) {
Location location = locationFactory.create("testActionFile" + i);
// values : event time is 100ms behind current timestamp
fileMetaDataWriter.writeMetaData(logPathIdentifier, eventTimestamp + i, currentTime + i, location);
nextExpected.add(location.toURI().getPath());
}
tillTime = currentTime + 70;
// lets delete till 70.
deletedEntries = fileMetadataCleaner.scanAndGetFilesToDelete(tillTime, TRANSACTION_TIMEOUT);
// we should have deleted 51-70 files of flow and 0-9 files of spark files in that order and 0 files of action.
Assert.assertEquals(30, deletedEntries.size());
count = 0;
for (FileMetadataCleaner.DeletedEntry deletedEntry : deletedEntries) {
Assert.assertEquals(expected.get(count), deletedEntry.getPath());
count += 1;
}
// now delete till currentTime + 100, this should delete all remaining entries.
// custom action should come first and then flow entries
tillTime = currentTime + 100;
// lets delete till 100.
deletedEntries = fileMetadataCleaner.scanAndGetFilesToDelete(tillTime, TRANSACTION_TIMEOUT);
// we should have deleted 90-99 of custom action(10) 71-99 (29) files of flow.
for (int i = 71; i < 100; i++) {
nextExpected.add(locationFactory.create("testFlowFile" + i).toURI().getPath());
}
Assert.assertEquals(39, deletedEntries.size());
count = 0;
for (FileMetadataCleaner.DeletedEntry deletedEntry : deletedEntries) {
Assert.assertEquals(nextExpected.get(count), deletedEntry.getPath());
count += 1;
}
// now lets do a delete with till time = currentTime + 1000, this should return empty result
tillTime = currentTime + 1000;
deletedEntries = fileMetadataCleaner.scanAndGetFilesToDelete(tillTime, TRANSACTION_TIMEOUT);
Assert.assertEquals(0, deletedEntries.size());
} finally {
// cleanup meta
cleanupMetadata(transactional, datasetManager);
}
}
use of co.cask.cdap.data2.dataset2.DatasetFramework in project cdap by caskdata.
the class FileMetadataCleanerTest method testScanAndDeleteOldMetadata.
@Test
public void testScanAndDeleteOldMetadata() throws Exception {
// use file meta data manager to write meta data in old format
// use file meta writer to write meta data in new format
// scan for old files and make sure we only get the old meta data entries.
DatasetFramework datasetFramework = injector.getInstance(DatasetFramework.class);
DatasetManager datasetManager = new DefaultDatasetManager(datasetFramework, NamespaceId.SYSTEM, co.cask.cdap.common.service.RetryStrategies.noRetry());
Transactional transactional = Transactions.createTransactionalWithRetry(Transactions.createTransactional(new MultiThreadDatasetCache(new SystemDatasetInstantiator(datasetFramework), injector.getInstance(TransactionSystemClient.class), NamespaceId.SYSTEM, ImmutableMap.<String, String>of(), null, null)), RetryStrategies.retryOnConflict(20, 100));
FileMetaDataWriter fileMetaDataWriter = new FileMetaDataWriter(datasetManager, transactional);
FileMetaDataManager fileMetaDataManager = injector.getInstance(FileMetaDataManager.class);
LoggingContext flowContext = LoggingContextHelper.getLoggingContext("testNs", "testApp", "testFlow", ProgramType.FLOW);
long eventTimestamp = System.currentTimeMillis();
LocationFactory locationFactory = injector.getInstance(LocationFactory.class);
Location testLocation = locationFactory.create("testFile");
try {
// write 50 entries in old format
for (int i = 0; i < 50; i++) {
fileMetaDataManager.writeMetaData(flowContext, eventTimestamp + i, testLocation);
}
LoggingContext wflowContext = LoggingContextHelper.getLoggingContext("testNs", "testApp", "testWflow", ProgramType.WORKFLOW);
fileMetaDataManager.writeMetaData(wflowContext, eventTimestamp, testLocation);
LoggingContext mrContext = LoggingContextHelper.getLoggingContext("testNs", "testApp", "testMR", ProgramType.MAPREDUCE);
fileMetaDataManager.writeMetaData(mrContext, eventTimestamp, testLocation);
LoggingContext sparkContext = LoggingContextHelper.getLoggingContext("testNs", "testApp", "testSpark", ProgramType.SPARK);
fileMetaDataManager.writeMetaData(sparkContext, eventTimestamp, testLocation);
// write 50 entries in new format
long newEventTime = eventTimestamp + 1000;
long currentTime = newEventTime + 1000;
LogPathIdentifier logPathIdentifier = new LogPathIdentifier("testNs", "testApp", "testFlow");
for (int i = 50; i < 100; i++) {
fileMetaDataWriter.writeMetaData(logPathIdentifier, newEventTime + i, currentTime + i, testLocation);
}
FileMetaDataReader fileMetaDataReader = injector.getInstance(FileMetaDataReader.class);
Assert.assertEquals(50, fileMetaDataReader.listFiles(LoggingContextHelper.getLogPathIdentifier(flowContext), eventTimestamp - 1, eventTimestamp + 100).size());
Assert.assertEquals(1, fileMetaDataReader.listFiles(LoggingContextHelper.getLogPathIdentifier(wflowContext), eventTimestamp - 1, eventTimestamp + 100).size());
Assert.assertEquals(1, fileMetaDataReader.listFiles(LoggingContextHelper.getLogPathIdentifier(mrContext), eventTimestamp - 1, eventTimestamp + 100).size());
Assert.assertEquals(1, fileMetaDataReader.listFiles(LoggingContextHelper.getLogPathIdentifier(sparkContext), eventTimestamp - 1, eventTimestamp + 100).size());
FileMetadataCleaner fileMetadataCleaner = new FileMetadataCleaner(datasetManager, transactional);
fileMetadataCleaner.scanAndDeleteOldMetaData(TRANSACTION_TIMEOUT, CUTOFF_TIME_TRANSACTION);
// deleted all old metadata
Assert.assertEquals(0, fileMetaDataReader.listFiles(logPathIdentifier, eventTimestamp - 1, eventTimestamp + 100).size());
Assert.assertEquals(0, fileMetaDataReader.listFiles(LoggingContextHelper.getLogPathIdentifier(wflowContext), eventTimestamp - 1, eventTimestamp + 100).size());
Assert.assertEquals(0, fileMetaDataReader.listFiles(LoggingContextHelper.getLogPathIdentifier(mrContext), eventTimestamp - 1, eventTimestamp + 100).size());
Assert.assertEquals(0, fileMetaDataReader.listFiles(LoggingContextHelper.getLogPathIdentifier(sparkContext), eventTimestamp - 1, eventTimestamp + 100).size());
} finally {
// cleanup meta
cleanupMetadata(transactional, datasetManager);
}
}
use of co.cask.cdap.data2.dataset2.DatasetFramework in project cdap by caskdata.
the class FileMetadataCleanerTest method testFileMetadataWithCommonContextPrefix.
@Test
public void testFileMetadataWithCommonContextPrefix() throws Exception {
DatasetFramework datasetFramework = injector.getInstance(DatasetFramework.class);
DatasetManager datasetManager = new DefaultDatasetManager(datasetFramework, NamespaceId.SYSTEM, co.cask.cdap.common.service.RetryStrategies.noRetry());
Transactional transactional = Transactions.createTransactionalWithRetry(Transactions.createTransactional(new MultiThreadDatasetCache(new SystemDatasetInstantiator(datasetFramework), injector.getInstance(TransactionSystemClient.class), NamespaceId.SYSTEM, ImmutableMap.<String, String>of(), null, null)), RetryStrategies.retryOnConflict(20, 100));
FileMetaDataWriter fileMetaDataWriter = new FileMetaDataWriter(datasetManager, transactional);
FileMetaDataReader fileMetadataReader = injector.getInstance(FileMetaDataReader.class);
FileMetadataCleaner fileMetadataCleaner = new FileMetadataCleaner(datasetManager, transactional);
try {
List<LogPathIdentifier> logPathIdentifiers = new ArrayList<>();
// this should be able to scan and delete common prefix programs like testFlow1, testFlow10 during clenaup.
for (int i = 1; i <= 20; i++) {
logPathIdentifiers.add(new LogPathIdentifier(NamespaceId.DEFAULT.getNamespace(), "testApp", String.format("testFlow%s", i)));
}
LocationFactory locationFactory = injector.getInstance(LocationFactory.class);
Location location = locationFactory.create(TMP_FOLDER.newFolder().getPath()).append("/logs");
long currentTime = System.currentTimeMillis();
long newCurrentTime = currentTime + 100;
for (int i = 1; i <= 20; i++) {
LogPathIdentifier identifier = logPathIdentifiers.get(i - 1);
for (int j = 0; j < 10; j++) {
fileMetaDataWriter.writeMetaData(identifier, newCurrentTime + j, newCurrentTime + j, location.append("testFileNew" + Integer.toString(j)));
}
}
List<LogLocation> locations;
for (int i = 1; i <= 20; i++) {
locations = fileMetadataReader.listFiles(logPathIdentifiers.get(i - 1), newCurrentTime, newCurrentTime + 10);
// should include files from currentTime (0..9)
Assert.assertEquals(10, locations.size());
}
long tillTime = newCurrentTime + 4;
List<FileMetadataCleaner.DeletedEntry> deleteEntries = fileMetadataCleaner.scanAndGetFilesToDelete(tillTime, TRANSACTION_TIMEOUT);
// 20 context, 5 entries each
Assert.assertEquals(100, deleteEntries.size());
for (int i = 1; i <= 20; i++) {
locations = fileMetadataReader.listFiles(logPathIdentifiers.get(i - 1), newCurrentTime, newCurrentTime + 10);
// should include files from time (5..9)
Assert.assertEquals(5, locations.size());
int startIndex = 5;
for (LogLocation logLocation : locations) {
Assert.assertEquals(String.format("testFileNew%s", startIndex), logLocation.getLocation().getName());
startIndex++;
}
}
} finally {
// cleanup meta
cleanupMetadata(transactional, datasetManager);
}
}
use of co.cask.cdap.data2.dataset2.DatasetFramework in project cdap by caskdata.
the class LogCleanerTest method testLogCleanup.
@Test
public void testLogCleanup() throws Exception {
// use file meta data manager to write meta data in old format
// use file meta writer to write meta data in new format
// scan for old files and make sure we only get the old meta data entries.
DatasetFramework datasetFramework = injector.getInstance(DatasetFramework.class);
DatasetManager datasetManager = new DefaultDatasetManager(datasetFramework, NamespaceId.SYSTEM, co.cask.cdap.common.service.RetryStrategies.noRetry());
Transactional transactional = Transactions.createTransactionalWithRetry(Transactions.createTransactional(new MultiThreadDatasetCache(new SystemDatasetInstantiator(datasetFramework), injector.getInstance(TransactionSystemClient.class), NamespaceId.SYSTEM, ImmutableMap.<String, String>of(), null, null)), RetryStrategies.retryOnConflict(20, 100));
FileMetadataCleaner fileMetadataCleaner = new FileMetadataCleaner(datasetManager, transactional);
LocationFactory locationFactory = injector.getInstance(LocationFactory.class);
long currentTime = System.currentTimeMillis();
LogPathIdentifier logPathIdentifier = new LogPathIdentifier("testNs", "testApp", "testEntity");
FileMetaDataWriter fileMetaDataWriter = new FileMetaDataWriter(datasetManager, transactional);
long startTime = currentTime - 5000;
Location dirLocation = locationFactory.create("logs");
dirLocation.mkdirs();
// create 20 files, add them in past time range
for (int i = 0; i < 20; i++) {
Location location = dirLocation.append("test" + i);
location.createNew();
fileMetaDataWriter.writeMetaData(logPathIdentifier, startTime + i, startTime + i, location);
}
Assert.assertEquals(20, dirLocation.list().size());
LogCleaner logCleaner = new LogCleaner(fileMetadataCleaner, locationFactory, 100, 60);
logCleaner.run();
FileMetaDataReader fileMetaDataReader = injector.getInstance(FileMetaDataReader.class);
// all meta data should be deleted
Assert.assertEquals(0, fileMetaDataReader.listFiles(logPathIdentifier, 0, System.currentTimeMillis()).size());
// we are not asserting file existence as the delete could fail and we don't guarantee file deletion.
}
use of co.cask.cdap.data2.dataset2.DatasetFramework in project cdap by caskdata.
the class UpgradeTool method createInjector.
@VisibleForTesting
Injector createInjector() throws Exception {
return Guice.createInjector(new ConfigModule(cConf, hConf), new LocationRuntimeModule().getDistributedModules(), new ZKClientModule(), new DiscoveryRuntimeModule().getDistributedModules(), new MessagingClientModule(), Modules.override(new DataSetsModules().getDistributedModules()).with(new AbstractModule() {
@Override
protected void configure() {
bind(DatasetFramework.class).to(InMemoryDatasetFramework.class).in(Scopes.SINGLETON);
// the DataSetsModules().getDistributedModules() binds to RemoteDatasetFramework so override that to
// the same InMemoryDatasetFramework
bind(DatasetFramework.class).annotatedWith(Names.named(DataSetsModules.BASE_DATASET_FRAMEWORK)).to(DatasetFramework.class);
install(new FactoryModuleBuilder().implement(DatasetDefinitionRegistry.class, DefaultDatasetDefinitionRegistry.class).build(DatasetDefinitionRegistryFactory.class));
// CDAP-5954 Upgrade tool does not need to record lineage and metadata changes for now.
bind(LineageWriter.class).to(NoOpLineageWriter.class);
}
}), new ViewAdminModules().getDistributedModules(), new StreamAdminModules().getDistributedModules(), new NotificationFeedClientModule(), new TwillModule(), new ExploreClientModule(), new ProgramRunnerRuntimeModule().getDistributedModules(), new ServiceStoreModules().getDistributedModules(), new SystemDatasetRuntimeModule().getDistributedModules(), // don't need real notifications for upgrade, so use the in-memory implementations
new NotificationServiceRuntimeModule().getInMemoryModules(), new KafkaClientModule(), new NamespaceStoreModule().getDistributedModules(), new AuthenticationContextModules().getMasterModule(), new AuthorizationModule(), new AuthorizationEnforcementModule().getMasterModule(), new SecureStoreModules().getDistributedModules(), new DataFabricModules(UpgradeTool.class.getName()).getDistributedModules(), new AppFabricServiceRuntimeModule().getDistributedModules(), new AbstractModule() {
@Override
protected void configure() {
// the DataFabricDistributedModule needs MetricsCollectionService binding and since Upgrade tool does not do
// anything with Metrics we just bind it to NoOpMetricsCollectionService
bind(MetricsCollectionService.class).to(NoOpMetricsCollectionService.class).in(Scopes.SINGLETON);
bind(MetricDatasetFactory.class).to(DefaultMetricDatasetFactory.class).in(Scopes.SINGLETON);
bind(MetricStore.class).to(DefaultMetricStore.class);
}
@Provides
@Singleton
@Named("datasetInstanceManager")
@SuppressWarnings("unused")
public DatasetInstanceManager getDatasetInstanceManager(TransactionSystemClientService txClient, TransactionExecutorFactory txExecutorFactory, @Named("datasetMDS") DatasetFramework framework) {
return new DatasetInstanceManager(txClient, txExecutorFactory, framework);
}
// This is needed because the LocalApplicationManager
// expects a dsframework injection named datasetMDS
@Provides
@Singleton
@Named("datasetMDS")
@SuppressWarnings("unused")
public DatasetFramework getInDsFramework(DatasetFramework dsFramework) {
return dsFramework;
}
});
}
Aggregations