use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class GoogleWebmasterExtractorTest method testConstructor.
/**
* Test that positionMaps and iterators are constructed correctly in the constructor
*/
@Test
public void testConstructor() throws IOException, DataRecordException {
WorkUnitState wuState = getWorkUnitState1();
wuState.setProp(GoogleWebMasterSource.KEY_REQUEST_FILTERS, "Country.USA,Country.ALL");
List<GoogleWebmasterFilter.Dimension> dimensions = Arrays.asList(GoogleWebmasterFilter.Dimension.PAGE, GoogleWebmasterFilter.Dimension.COUNTRY);
List<GoogleWebmasterDataFetcher.Metric> metrics = Arrays.asList(GoogleWebmasterDataFetcher.Metric.CLICKS);
Map<String, Integer> positionMap = new HashMap<>();
positionMap.put(GoogleWebmasterDataFetcher.Metric.CLICKS.toString(), 0);
positionMap.put(GoogleWebmasterFilter.Dimension.COUNTRY.toString(), 1);
positionMap.put(GoogleWebmasterFilter.Dimension.PAGE.toString(), 2);
GoogleWebmasterDataFetcher dataFetcher1 = Mockito.mock(GoogleWebmasterDataFetcher.class);
GoogleWebmasterDataFetcher dataFetcher2 = Mockito.mock(GoogleWebmasterDataFetcher.class);
GoogleWebmasterExtractor extractor = new GoogleWebmasterExtractor(wuState, wuState.getWorkunit().getLowWatermark(LongWatermark.class).getValue(), wuState.getWorkunit().getExpectedHighWatermark(LongWatermark.class).getValue(), positionMap, dimensions, metrics, null, Arrays.asList(dataFetcher1, dataFetcher2));
List<GoogleWebmasterExtractorIterator> iterators = extractor.getIterators();
Assert.assertEquals(iterators.size(), 4);
Assert.assertEquals(iterators.get(0).getCountry(), "USA");
Assert.assertEquals(iterators.get(1).getCountry(), "ALL");
Assert.assertEquals(iterators.get(2).getCountry(), "USA");
Assert.assertEquals(iterators.get(3).getCountry(), "ALL");
List<int[]> responseToOutputSchema = extractor.getPositionMaps();
Assert.assertEquals(responseToOutputSchema.size(), 4);
// country is Country.USA
Assert.assertEquals(new int[] { 2, 1, 0 }, responseToOutputSchema.get(0));
Assert.assertEquals(new int[] { 2, 0 }, // country is Country.ALL, so the country request will be removed.
responseToOutputSchema.get(1));
Assert.assertEquals(new int[] { 2, 1, 0 }, responseToOutputSchema.get(2));
Assert.assertEquals(new int[] { 2, 0 }, responseToOutputSchema.get(3));
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class GoogleAnalyticsUnsampledExtractorTest method testPollForCompletionWithException.
public void testPollForCompletionWithException() throws IOException {
wuState = new WorkUnitState();
wuState.setProp(POLL_RETRY_PREFIX + RETRY_TIME_OUT_MS, TimeUnit.SECONDS.toMillis(30L));
wuState.setProp(POLL_RETRY_PREFIX + RETRY_INTERVAL_MS, 1L);
GoogleAnalyticsUnsampledExtractor extractor = setup(ReportCreationStatus.COMPLETED, wuState, true);
UnsampledReport requestedReport = new UnsampledReport().setAccountId("testAccountId").setWebPropertyId("testWebPropertyId").setProfileId("testProfileId").setId("testId");
String actualFileId = extractor.pollForCompletion(wuState, gaService, requestedReport).getDriveDownloadDetails().getDocumentId();
Assert.assertEquals(actualFileId, EXPECTED_FILE_ID);
verify(getReq, atLeast(5)).execute();
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class GoogleAnalyticsUnsampledExtractorTest method testPollForCompletionFailure.
public void testPollForCompletionFailure() throws IOException {
wuState = new WorkUnitState();
wuState.setProp(POLL_RETRY_PREFIX + RETRY_TIME_OUT_MS, TimeUnit.SECONDS.toMillis(30L));
wuState.setProp(POLL_RETRY_PREFIX + RETRY_INTERVAL_MS, 1L);
GoogleAnalyticsUnsampledExtractor extractor = setup(ReportCreationStatus.FAILED, wuState, false);
UnsampledReport requestedReport = new UnsampledReport().setAccountId("testAccountId").setWebPropertyId("testWebPropertyId").setProfileId("testProfileId").setId("testId");
try {
extractor.pollForCompletion(wuState, gaService, requestedReport);
Assert.fail("Should have failed with failed status");
} catch (Exception e) {
Assert.assertTrue(e.getCause().getCause() instanceof NonTransientException);
}
verify(getReq, atLeast(5)).execute();
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class GoogleDriveSourceTest method testGetExtractor.
public void testGetExtractor() throws IOException {
@SuppressWarnings("rawtypes") GoogleDriveSource source = new GoogleDriveSource<>();
GoogleDriveFsHelper fsHelper = mock(GoogleDriveFsHelper.class);
source.fsHelper = fsHelper;
Extractor extractor = source.getExtractor(new WorkUnitState());
Assert.assertTrue(extractor instanceof GoogleDriveExtractor);
}
use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.
the class AbstractJobLauncher method cleanLeftoverStagingData.
/**
* Cleanup the left-over staging data possibly from the previous run of the job that may have failed
* and not cleaned up its staging data.
*
* Property {@link ConfigurationKeys#CLEANUP_STAGING_DATA_PER_TASK} controls whether to cleanup
* staging data per task, or to cleanup entire job's staging data at once.
*
* Staging data will not be cleaned if the job has unfinished {@link CommitSequence}s.
*/
private void cleanLeftoverStagingData(WorkUnitStream workUnits, JobState jobState) throws JobException {
if (jobState.getPropAsBoolean(ConfigurationKeys.CLEANUP_STAGING_DATA_BY_INITIALIZER, false)) {
// Clean up will be done by initializer.
return;
}
try {
if (!canCleanStagingData(jobState)) {
LOG.error("Job " + jobState.getJobName() + " has unfinished commit sequences. Will not clean up staging data.");
return;
}
} catch (IOException e) {
throw new JobException("Failed to check unfinished commit sequences", e);
}
try {
if (this.jobContext.shouldCleanupStagingDataPerTask()) {
if (workUnits.isSafeToMaterialize()) {
Closer closer = Closer.create();
Map<String, ParallelRunner> parallelRunners = Maps.newHashMap();
try {
for (WorkUnit workUnit : JobLauncherUtils.flattenWorkUnits(workUnits.getMaterializedWorkUnitCollection())) {
JobLauncherUtils.cleanTaskStagingData(new WorkUnitState(workUnit, jobState), LOG, closer, parallelRunners);
}
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
} else {
throw new RuntimeException("Work unit streams do not support cleaning staging data per task.");
}
} else {
if (jobState.getPropAsBoolean(ConfigurationKeys.CLEANUP_OLD_JOBS_DATA, ConfigurationKeys.DEFAULT_CLEANUP_OLD_JOBS_DATA)) {
JobLauncherUtils.cleanUpOldJobData(jobState, LOG, jobContext.getStagingDirProvided(), jobContext.getOutputDirProvided());
}
JobLauncherUtils.cleanJobStagingData(jobState, LOG);
}
} catch (Throwable t) {
// Catch Throwable instead of just IOException to make sure failure of this won't affect the current run
LOG.error("Failed to clean leftover staging data", t);
}
}
Aggregations