use of org.apache.gobblin.util.Either in project incubator-gobblin by apache.
the class ConfigBasedDatasetsFinder method executeItertorExecutor.
protected void executeItertorExecutor(Iterator<Callable<Void>> callableIterator) throws IOException {
try {
IteratorExecutor<Void> executor = new IteratorExecutor<>(callableIterator, this.threadPoolSize, ExecutorsUtils.newDaemonThreadFactory(Optional.of(log), Optional.of(this.getClass().getSimpleName())));
List<Either<Void, ExecutionException>> results = executor.executeAndGetResults();
IteratorExecutor.logFailures(results, log, 10);
} catch (InterruptedException ie) {
throw new IOException("Dataset finder is interrupted.", ie);
}
}
use of org.apache.gobblin.util.Either in project incubator-gobblin by apache.
the class FsDatasetStateStore method getLatestDatasetStatesByUrns.
/**
* Get a {@link Map} from dataset URNs to the latest {@link JobState.DatasetState}s.
*
* @param jobName the job name
* @return a {@link Map} from dataset URNs to the latest {@link JobState.DatasetState}s
* @throws IOException if there's something wrong reading the {@link JobState.DatasetState}s
*/
public Map<String, JobState.DatasetState> getLatestDatasetStatesByUrns(final String jobName) throws IOException {
Path stateStorePath = new Path(this.storeRootDir, jobName);
if (!this.fs.exists(stateStorePath)) {
return ImmutableMap.of();
}
FileStatus[] stateStoreFileStatuses = this.fs.listStatus(stateStorePath, new PathFilter() {
@Override
public boolean accept(Path path) {
return path.getName().endsWith(CURRENT_DATASET_STATE_FILE_SUFFIX + DATASET_STATE_STORE_TABLE_SUFFIX);
}
});
if (stateStoreFileStatuses == null || stateStoreFileStatuses.length == 0) {
return ImmutableMap.of();
}
final Map<String, JobState.DatasetState> datasetStatesByUrns = new ConcurrentHashMap<>();
Iterator<Callable<Void>> callableIterator = Iterators.transform(Arrays.asList(stateStoreFileStatuses).iterator(), new Function<FileStatus, Callable<Void>>() {
@Override
public Callable<Void> apply(final FileStatus stateStoreFileStatus) {
return new Callable<Void>() {
@Override
public Void call() throws Exception {
Path stateStoreFilePath = stateStoreFileStatus.getPath();
LOGGER.info("Getting dataset states from: {}", stateStoreFilePath);
List<JobState.DatasetState> previousDatasetStates = getAll(jobName, stateStoreFilePath.getName());
if (!previousDatasetStates.isEmpty()) {
// There should be a single dataset state on the list if the list is not empty
JobState.DatasetState previousDatasetState = previousDatasetStates.get(0);
datasetStatesByUrns.put(previousDatasetState.getDatasetUrn(), previousDatasetState);
}
return null;
}
};
}
});
try {
List<Either<Void, ExecutionException>> results = new IteratorExecutor<>(callableIterator, this.threadPoolOfGettingDatasetState, ExecutorsUtils.newDaemonThreadFactory(Optional.of(LOGGER), Optional.of("GetFsDatasetStateStore-"))).executeAndGetResults();
int maxNumberOfErrorLogs = 10;
IteratorExecutor.logAndThrowFailures(results, LOGGER, maxNumberOfErrorLogs);
} catch (InterruptedException e) {
throw new IOException("Failed to get latest dataset states.", e);
}
// the job has transitioned to the new dataset-based mechanism
if (datasetStatesByUrns.size() > 1) {
datasetStatesByUrns.remove(ConfigurationKeys.DEFAULT_DATASET_URN);
}
return datasetStatesByUrns;
}
use of org.apache.gobblin.util.Either in project incubator-gobblin by apache.
the class KafkaAvroJobMonitorTest method testWrongSchema.
@Test
public void testWrongSchema() throws Exception {
TestKafkaAvroJobMonitor monitor = new TestKafkaAvroJobMonitor(GobblinTrackingEvent.SCHEMA$, new NoopSchemaVersionWriter());
monitor.buildMetricsContextAndMetrics();
AvroSerializer<MetricReport> serializer = new AvroBinarySerializer<>(MetricReport.SCHEMA$, new NoopSchemaVersionWriter());
MetricReport event = new MetricReport(Maps.<String, String>newHashMap(), 0L, Lists.<Metric>newArrayList());
Collection<Either<JobSpec, URI>> results = monitor.parseJobSpec(serializer.serializeRecord(event));
Assert.assertEquals(results.size(), 0);
Assert.assertEquals(monitor.events.size(), 0);
Assert.assertEquals(monitor.getMessageParseFailures().getCount(), 1);
monitor.shutdownMetrics();
}
use of org.apache.gobblin.util.Either in project incubator-gobblin by apache.
the class KafkaAvroJobMonitorTest method testWrongSchemaVersionWriter.
@Test
public void testWrongSchemaVersionWriter() throws Exception {
TestKafkaAvroJobMonitor monitor = new TestKafkaAvroJobMonitor(GobblinTrackingEvent.SCHEMA$, new NoopSchemaVersionWriter());
monitor.buildMetricsContextAndMetrics();
AvroSerializer<GobblinTrackingEvent> serializer = new AvroBinarySerializer<>(GobblinTrackingEvent.SCHEMA$, new FixedSchemaVersionWriter());
GobblinTrackingEvent event = new GobblinTrackingEvent(0L, "namespace", "event", Maps.<String, String>newHashMap());
Collection<Either<JobSpec, URI>> results = monitor.parseJobSpec(serializer.serializeRecord(event));
Assert.assertEquals(results.size(), 0);
Assert.assertEquals(monitor.events.size(), 0);
Assert.assertEquals(monitor.getMessageParseFailures().getCount(), 1);
monitor.shutdownMetrics();
}
use of org.apache.gobblin.util.Either in project incubator-gobblin by apache.
the class KafkaAvroJobMonitorTest method testUsingSchemaVersion.
@Test
public void testUsingSchemaVersion() throws Exception {
TestKafkaAvroJobMonitor monitor = new TestKafkaAvroJobMonitor(GobblinTrackingEvent.SCHEMA$, new FixedSchemaVersionWriter());
monitor.buildMetricsContextAndMetrics();
AvroSerializer<GobblinTrackingEvent> serializer = new AvroBinarySerializer<>(GobblinTrackingEvent.SCHEMA$, new FixedSchemaVersionWriter());
GobblinTrackingEvent event = new GobblinTrackingEvent(0L, "namespace", "event", Maps.<String, String>newHashMap());
Collection<Either<JobSpec, URI>> results = monitor.parseJobSpec(serializer.serializeRecord(event));
Assert.assertEquals(results.size(), 1);
Assert.assertEquals(monitor.events.size(), 1);
Assert.assertEquals(monitor.events.get(0), event);
monitor.shutdownMetrics();
}
Aggregations