use of org.apache.gobblin.dataset.Dataset in project incubator-gobblin by apache.
the class CompactionSource method prioritize.
private List<Dataset> prioritize(List<Dataset> datasets, State state) {
double maxPool = state.getPropAsDouble(MRCompactor.COMPACTION_DATASETS_MAX_COUNT, MRCompactor.DEFUALT_COMPACTION_DATASETS_MAX_COUNT);
ResourcePool pool = ResourcePool.builder().maxResource(SimpleDatasetRequest.SIMPLE_DATASET_COUNT_DIMENSION, maxPool).build();
Iterator<Dataset> newList = Iterators.transform(this.allocator.allocateRequests(datasets.stream().map(SimpleDatasetRequestor::new).iterator(), pool), (input) -> input.getDataset());
return Lists.newArrayList(newList);
}
use of org.apache.gobblin.dataset.Dataset in project incubator-gobblin by apache.
the class DatasetCleaner method clean.
/**
* Perform the cleanup of old / deprecated dataset versions.
* @throws IOException
*/
public void clean() throws IOException {
List<Dataset> dataSets = this.datasetFinder.findDatasets();
this.finishCleanSignal = Optional.of(new CountDownLatch(dataSets.size()));
for (final Dataset dataset : dataSets) {
ListenableFuture<Void> future = this.service.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
if (dataset instanceof CleanableDataset) {
((CleanableDataset) dataset).clean();
}
return null;
}
});
Futures.addCallback(future, new FutureCallback<Void>() {
@Override
public void onFailure(Throwable throwable) {
DatasetCleaner.this.finishCleanSignal.get().countDown();
LOG.warn("Exception caught when cleaning " + dataset.datasetURN() + ".", throwable);
DatasetCleaner.this.throwables.add(throwable);
Instrumented.markMeter(DatasetCleaner.this.datasetsCleanFailureMeter);
DatasetCleaner.this.eventSubmitter.submit(RetentionEvents.CleanFailed.EVENT_NAME, ImmutableMap.of(RetentionEvents.CleanFailed.FAILURE_CONTEXT_METADATA_KEY, ExceptionUtils.getFullStackTrace(throwable), RetentionEvents.DATASET_URN_METADATA_KEY, dataset.datasetURN()));
}
@Override
public void onSuccess(Void arg0) {
DatasetCleaner.this.finishCleanSignal.get().countDown();
LOG.info("Successfully cleaned: " + dataset.datasetURN());
Instrumented.markMeter(DatasetCleaner.this.datasetsCleanSuccessMeter);
}
});
}
}
use of org.apache.gobblin.dataset.Dataset in project incubator-gobblin by apache.
the class LoopingDatasetFinderSource method getWorkunitStream.
@Override
public WorkUnitStream getWorkunitStream(SourceState state) {
try {
int maxWorkUnits = state.getPropAsInt(MAX_WORK_UNITS_PER_RUN_KEY, MAX_WORK_UNITS_PER_RUN);
List<WorkUnitState> previousWorkUnitStates = state.getPreviousWorkUnitStates();
Optional<WorkUnitState> maxWorkUnit;
try {
maxWorkUnit = previousWorkUnitStates.stream().reduce((wu1, wu2) -> {
int wu1Ordinal = wu1.getPropAsInt(WORK_UNIT_ORDINAL);
int wu2Ordinal = wu2.getPropAsInt(WORK_UNIT_ORDINAL);
return wu1Ordinal > wu2Ordinal ? wu1 : wu2;
});
} catch (NumberFormatException nfe) {
throw new RuntimeException("Work units in state store are corrupted! Missing or malformed " + WORK_UNIT_ORDINAL);
}
String previousDatasetUrnWatermark = null;
String previousPartitionUrnWatermark = null;
if (maxWorkUnit.isPresent() && !maxWorkUnit.get().getPropAsBoolean(END_OF_DATASETS_KEY, false)) {
previousDatasetUrnWatermark = maxWorkUnit.get().getProp(DATASET_URN);
previousPartitionUrnWatermark = maxWorkUnit.get().getProp(PARTITION_URN);
}
IterableDatasetFinder datasetsFinder = createDatasetsFinder(state);
Stream<Dataset> datasetStream = datasetsFinder.getDatasetsStream(Spliterator.SORTED, this.lexicographicalComparator);
datasetStream = sortStreamLexicographically(datasetStream);
return new BasicWorkUnitStream.Builder(new DeepIterator(datasetStream.iterator(), previousDatasetUrnWatermark, previousPartitionUrnWatermark, maxWorkUnits)).setFiniteStream(true).build();
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
use of org.apache.gobblin.dataset.Dataset in project incubator-gobblin by apache.
the class ComplianceRestoreJob method run.
public void run() throws IOException {
Preconditions.checkNotNull(this.finder, "Dataset finder class is not set");
List<Dataset> datasets = this.finder.findDatasets();
this.finishCleanSignal = Optional.of(new CountDownLatch(datasets.size()));
for (final Dataset dataset : datasets) {
ListenableFuture<Void> future = this.service.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
if (dataset instanceof RestorableDataset) {
log.info("Trying to restore");
((RestorableDataset) dataset).restore();
} else {
log.warn("Not an instance of " + RestorableDataset.class + " Dataset won't be restored " + dataset.datasetURN());
}
return null;
}
});
Futures.addCallback(future, new FutureCallback<Void>() {
@Override
public void onSuccess(@Nullable Void result) {
ComplianceRestoreJob.this.finishCleanSignal.get().countDown();
log.info("Successfully restored: " + dataset.datasetURN());
}
@Override
public void onFailure(Throwable t) {
ComplianceRestoreJob.this.finishCleanSignal.get().countDown();
log.warn("Exception caught when restoring " + dataset.datasetURN() + ".", t);
ComplianceRestoreJob.this.throwables.add(t);
ComplianceRestoreJob.this.eventSubmitter.submit(ComplianceEvents.Restore.FAILED_EVENT_NAME, ImmutableMap.of(ComplianceEvents.FAILURE_CONTEXT_METADATA_KEY, ExceptionUtils.getFullStackTrace(t), ComplianceEvents.DATASET_URN_METADATA_KEY, dataset.datasetURN()));
}
});
}
}
use of org.apache.gobblin.dataset.Dataset in project incubator-gobblin by apache.
the class ComplianceValidationJob method run.
public void run() throws IOException {
Preconditions.checkNotNull(this.finder, "Dataset finder class is not set");
List<Dataset> datasets = this.finder.findDatasets();
this.finishCleanSignal = Optional.of(new CountDownLatch(datasets.size()));
for (final Dataset dataset : datasets) {
ListenableFuture<Void> future = this.service.submit(new Callable<Void>() {
@Override
public Void call() throws Exception {
if (dataset instanceof ValidatableDataset) {
((ValidatableDataset) dataset).validate();
} else {
log.warn("Not an instance of " + ValidatableDataset.class + " Dataset won't be validated " + dataset.datasetURN());
}
return null;
}
});
Futures.addCallback(future, new FutureCallback<Void>() {
@Override
public void onSuccess(@Nullable Void result) {
ComplianceValidationJob.this.finishCleanSignal.get().countDown();
log.info("Successfully validated: " + dataset.datasetURN());
}
@Override
public void onFailure(Throwable t) {
ComplianceValidationJob.this.finishCleanSignal.get().countDown();
log.warn("Exception caught when validating " + dataset.datasetURN() + ".", t);
ComplianceValidationJob.this.throwables.add(t);
ComplianceValidationJob.this.eventSubmitter.submit(ComplianceEvents.Validation.FAILED_EVENT_NAME, ImmutableMap.of(ComplianceEvents.FAILURE_CONTEXT_METADATA_KEY, ExceptionUtils.getFullStackTrace(t), ComplianceEvents.DATASET_URN_METADATA_KEY, dataset.datasetURN()));
}
});
}
}
Aggregations