use of org.apache.gobblin.data.management.retention.dataset.CleanableDataset in project incubator-gobblin by apache.
the class ComplianceRetentionJob method run.
public void run() throws IOException {
// Dropping empty tables
for (HiveDataset dataset : this.tablesToDrop) {
log.info("Dropping table: " + dataset.getTable().getCompleteName());
executeDropTableQuery(dataset, this.properties);
}
Preconditions.checkNotNull(this.finder, "Dataset finder class is not set");
List<Dataset> datasets = this.finder.findDatasets();
this.finishCleanSignal = Optional.of(new CountDownLatch(datasets.size()));
for (final Dataset dataset : datasets) {
ListenableFuture<Void> future = this.service.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
if (dataset instanceof CleanableDataset) {
((CleanableDataset) dataset).clean();
} else {
log.warn("Not an instance of " + CleanableDataset.class + " Dataset won't be cleaned " + dataset.datasetURN());
}
return null;
}
});
Futures.addCallback(future, new FutureCallback<Void>() {
@Override
public void onSuccess(@Nullable Void result) {
ComplianceRetentionJob.this.finishCleanSignal.get().countDown();
log.info("Successfully cleaned: " + dataset.datasetURN());
}
@Override
public void onFailure(Throwable t) {
ComplianceRetentionJob.this.finishCleanSignal.get().countDown();
log.warn("Exception caught when cleaning " + dataset.datasetURN() + ".", t);
ComplianceRetentionJob.this.throwables.add(t);
ComplianceRetentionJob.this.eventSubmitter.submit(ComplianceEvents.Retention.FAILED_EVENT_NAME, ImmutableMap.of(ComplianceEvents.FAILURE_CONTEXT_METADATA_KEY, ExceptionUtils.getFullStackTrace(t), ComplianceEvents.DATASET_URN_METADATA_KEY, dataset.datasetURN()));
}
});
}
}
use of org.apache.gobblin.data.management.retention.dataset.CleanableDataset in project incubator-gobblin by apache.
the class CleanableDatasetStoreDatasetTest method testCleanStateStore.
@Test
public void testCleanStateStore() throws IOException {
File tmpDir = Files.createTempDir();
tmpDir.deleteOnExit();
FileSystem fs = FileSystem.getLocal(new Configuration());
FsDatasetStateStore store = new FsDatasetStateStore(fs, tmpDir.getAbsolutePath());
store.persistDatasetState("dataset1", new JobState.DatasetState("job1", "job1_id1"));
store.persistDatasetState("dataset1", new JobState.DatasetState("job1", "job1_id2"));
store.persistDatasetState("dataset1", new JobState.DatasetState("job2", "job2_id1"));
store.persistDatasetState("", new JobState.DatasetState("job3", "job3_id1"));
Properties props = new Properties();
props.setProperty(ConfigurationKeys.STATE_STORE_ROOT_DIR_KEY, tmpDir.getAbsolutePath());
props.setProperty("selection.timeBased.lookbackTime", "0m");
TimeBasedDatasetStoreDatasetFinder datasetFinder = new TimeBasedDatasetStoreDatasetFinder(fs, props);
List<DatasetStoreDataset> datasets = datasetFinder.findDatasets();
for (DatasetStoreDataset dataset : datasets) {
((CleanableDataset) dataset).clean();
File jobDir = new File(tmpDir.getAbsolutePath(), dataset.getKey().getStoreName());
Assert.assertEquals(jobDir.list().length, 1);
}
}
use of org.apache.gobblin.data.management.retention.dataset.CleanableDataset in project incubator-gobblin by apache.
the class RetentionTestHelper method clean.
/**
* Does gobblin retention for test data. {@link DatasetCleaner} which does retention in production can not be directly called as we need to resolve some
* runtime properties like ${testNameTempPath}. This directory contains all the setup data created for a test by {@link RetentionTestDataGenerator#setup()}.
* It is unique for each test.
* The default {@link ConfigClient} used by {@link DatasetCleaner} connects to config store configs. We need to provide a
* mock {@link ConfigClient} since the configs are in classpath and not on config store.
*
* @param retentionConfigClasspathResource this is the same jobProps/config files used while running a real retention job
* @param testNameTempPath temp path for this test where test data is generated
*/
public static void clean(FileSystem fs, Path retentionConfigClasspathResource, Optional<Path> additionalJobPropsClasspathResource, Path testNameTempPath) throws Exception {
Properties additionalJobProps = new Properties();
if (additionalJobPropsClasspathResource.isPresent()) {
try (final InputStream stream = RetentionTestHelper.class.getClassLoader().getResourceAsStream(additionalJobPropsClasspathResource.get().toString())) {
additionalJobProps.load(stream);
}
}
if (retentionConfigClasspathResource.getName().endsWith(".job")) {
Properties jobProps = new Properties();
try (final InputStream stream = RetentionTestHelper.class.getClassLoader().getResourceAsStream(retentionConfigClasspathResource.toString())) {
jobProps.load(stream);
for (Entry<Object, Object> entry : jobProps.entrySet()) {
jobProps.put(entry.getKey(), StringUtils.replace((String) entry.getValue(), "${testNameTempPath}", testNameTempPath.toString()));
}
}
MultiCleanableDatasetFinder finder = new MultiCleanableDatasetFinder(fs, jobProps);
for (Dataset dataset : finder.findDatasets()) {
((CleanableDataset) dataset).clean();
}
} else {
Config testConfig = ConfigFactory.parseResources(retentionConfigClasspathResource.toString()).withFallback(ConfigFactory.parseMap(ImmutableMap.of("testNameTempPath", PathUtils.getPathWithoutSchemeAndAuthority(testNameTempPath).toString()))).resolve();
ConfigClient client = mock(ConfigClient.class);
when(client.getConfig(any(String.class))).thenReturn(testConfig);
Properties jobProps = new Properties();
jobProps.setProperty(CleanableDatasetBase.SKIP_TRASH_KEY, Boolean.toString(true));
jobProps.setProperty(ConfigurationKeys.CONFIG_MANAGEMENT_STORE_URI, "dummy");
jobProps.setProperty(ConfigurationKeys.CONFIG_MANAGEMENT_STORE_ENABLED, "true");
jobProps.putAll(additionalJobProps);
@SuppressWarnings("unchecked") DatasetsFinder<CleanableDataset> finder = (DatasetsFinder<CleanableDataset>) GobblinConstructorUtils.invokeFirstConstructor(Class.forName(testConfig.getString(MultiCleanableDatasetFinder.DATASET_FINDER_CLASS_KEY)), ImmutableList.of(fs, jobProps, testConfig, client), ImmutableList.of(fs, jobProps, client));
for (CleanableDataset dataset : finder.findDatasets()) {
dataset.clean();
}
}
}
use of org.apache.gobblin.data.management.retention.dataset.CleanableDataset in project incubator-gobblin by apache.
the class DatasetCleaner method clean.
/**
* Perform the cleanup of old / deprecated dataset versions.
* @throws IOException
*/
public void clean() throws IOException {
List<Dataset> dataSets = this.datasetFinder.findDatasets();
this.finishCleanSignal = Optional.of(new CountDownLatch(dataSets.size()));
for (final Dataset dataset : dataSets) {
ListenableFuture<Void> future = this.service.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
if (dataset instanceof CleanableDataset) {
((CleanableDataset) dataset).clean();
}
return null;
}
});
Futures.addCallback(future, new FutureCallback<Void>() {
@Override
public void onFailure(Throwable throwable) {
DatasetCleaner.this.finishCleanSignal.get().countDown();
LOG.warn("Exception caught when cleaning " + dataset.datasetURN() + ".", throwable);
DatasetCleaner.this.throwables.add(throwable);
Instrumented.markMeter(DatasetCleaner.this.datasetsCleanFailureMeter);
DatasetCleaner.this.eventSubmitter.submit(RetentionEvents.CleanFailed.EVENT_NAME, ImmutableMap.of(RetentionEvents.CleanFailed.FAILURE_CONTEXT_METADATA_KEY, ExceptionUtils.getFullStackTrace(throwable), RetentionEvents.DATASET_URN_METADATA_KEY, dataset.datasetURN()));
}
@Override
public void onSuccess(Void arg0) {
DatasetCleaner.this.finishCleanSignal.get().countDown();
LOG.info("Successfully cleaned: " + dataset.datasetURN());
Instrumented.markMeter(DatasetCleaner.this.datasetsCleanSuccessMeter);
}
});
}
}
Aggregations