Search in sources :

Example 31 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class GoogleWebmasterExtractorTest method testConstructor.

/**
 * Test that positionMaps and iterators are constructed correctly in the constructor
 */
@Test
public void testConstructor() throws IOException, DataRecordException {
    WorkUnitState wuState = getWorkUnitState1();
    wuState.setProp(GoogleWebMasterSource.KEY_REQUEST_FILTERS, "Country.USA,Country.ALL");
    List<GoogleWebmasterFilter.Dimension> dimensions = Arrays.asList(GoogleWebmasterFilter.Dimension.PAGE, GoogleWebmasterFilter.Dimension.COUNTRY);
    List<GoogleWebmasterDataFetcher.Metric> metrics = Arrays.asList(GoogleWebmasterDataFetcher.Metric.CLICKS);
    Map<String, Integer> positionMap = new HashMap<>();
    positionMap.put(GoogleWebmasterDataFetcher.Metric.CLICKS.toString(), 0);
    positionMap.put(GoogleWebmasterFilter.Dimension.COUNTRY.toString(), 1);
    positionMap.put(GoogleWebmasterFilter.Dimension.PAGE.toString(), 2);
    GoogleWebmasterDataFetcher dataFetcher1 = Mockito.mock(GoogleWebmasterDataFetcher.class);
    GoogleWebmasterDataFetcher dataFetcher2 = Mockito.mock(GoogleWebmasterDataFetcher.class);
    GoogleWebmasterExtractor extractor = new GoogleWebmasterExtractor(wuState, wuState.getWorkunit().getLowWatermark(LongWatermark.class).getValue(), wuState.getWorkunit().getExpectedHighWatermark(LongWatermark.class).getValue(), positionMap, dimensions, metrics, null, Arrays.asList(dataFetcher1, dataFetcher2));
    List<GoogleWebmasterExtractorIterator> iterators = extractor.getIterators();
    Assert.assertEquals(iterators.size(), 4);
    Assert.assertEquals(iterators.get(0).getCountry(), "USA");
    Assert.assertEquals(iterators.get(1).getCountry(), "ALL");
    Assert.assertEquals(iterators.get(2).getCountry(), "USA");
    Assert.assertEquals(iterators.get(3).getCountry(), "ALL");
    List<int[]> responseToOutputSchema = extractor.getPositionMaps();
    Assert.assertEquals(responseToOutputSchema.size(), 4);
    // country is Country.USA
    Assert.assertEquals(new int[] { 2, 1, 0 }, responseToOutputSchema.get(0));
    Assert.assertEquals(new int[] { 2, 0 }, // country is Country.ALL, so the country request will be removed.
    responseToOutputSchema.get(1));
    Assert.assertEquals(new int[] { 2, 1, 0 }, responseToOutputSchema.get(2));
    Assert.assertEquals(new int[] { 2, 0 }, responseToOutputSchema.get(3));
}
Also used : HashMap(java.util.HashMap) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) Test(org.testng.annotations.Test)

Example 32 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class GoogleAnalyticsUnsampledExtractorTest method testPollForCompletionWithException.

public void testPollForCompletionWithException() throws IOException {
    wuState = new WorkUnitState();
    wuState.setProp(POLL_RETRY_PREFIX + RETRY_TIME_OUT_MS, TimeUnit.SECONDS.toMillis(30L));
    wuState.setProp(POLL_RETRY_PREFIX + RETRY_INTERVAL_MS, 1L);
    GoogleAnalyticsUnsampledExtractor extractor = setup(ReportCreationStatus.COMPLETED, wuState, true);
    UnsampledReport requestedReport = new UnsampledReport().setAccountId("testAccountId").setWebPropertyId("testWebPropertyId").setProfileId("testProfileId").setId("testId");
    String actualFileId = extractor.pollForCompletion(wuState, gaService, requestedReport).getDriveDownloadDetails().getDocumentId();
    Assert.assertEquals(actualFileId, EXPECTED_FILE_ID);
    verify(getReq, atLeast(5)).execute();
}
Also used : WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) UnsampledReport(com.google.api.services.analytics.model.UnsampledReport)

Example 33 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class GoogleAnalyticsUnsampledExtractorTest method testPollForCompletionFailure.

public void testPollForCompletionFailure() throws IOException {
    wuState = new WorkUnitState();
    wuState.setProp(POLL_RETRY_PREFIX + RETRY_TIME_OUT_MS, TimeUnit.SECONDS.toMillis(30L));
    wuState.setProp(POLL_RETRY_PREFIX + RETRY_INTERVAL_MS, 1L);
    GoogleAnalyticsUnsampledExtractor extractor = setup(ReportCreationStatus.FAILED, wuState, false);
    UnsampledReport requestedReport = new UnsampledReport().setAccountId("testAccountId").setWebPropertyId("testWebPropertyId").setProfileId("testProfileId").setId("testId");
    try {
        extractor.pollForCompletion(wuState, gaService, requestedReport);
        Assert.fail("Should have failed with failed status");
    } catch (Exception e) {
        Assert.assertTrue(e.getCause().getCause() instanceof NonTransientException);
    }
    verify(getReq, atLeast(5)).execute();
}
Also used : NonTransientException(org.apache.gobblin.exception.NonTransientException) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) NonTransientException(org.apache.gobblin.exception.NonTransientException) IOException(java.io.IOException) UnsampledReport(com.google.api.services.analytics.model.UnsampledReport)

Example 34 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class GoogleDriveSourceTest method testGetExtractor.

public void testGetExtractor() throws IOException {
    @SuppressWarnings("rawtypes") GoogleDriveSource source = new GoogleDriveSource<>();
    GoogleDriveFsHelper fsHelper = mock(GoogleDriveFsHelper.class);
    source.fsHelper = fsHelper;
    Extractor extractor = source.getExtractor(new WorkUnitState());
    Assert.assertTrue(extractor instanceof GoogleDriveExtractor);
}
Also used : GoogleDriveFsHelper(org.apache.gobblin.source.extractor.extract.google.GoogleDriveFsHelper) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) GoogleDriveExtractor(org.apache.gobblin.source.extractor.extract.google.GoogleDriveExtractor) GoogleDriveExtractor(org.apache.gobblin.source.extractor.extract.google.GoogleDriveExtractor) Extractor(org.apache.gobblin.source.extractor.Extractor) GoogleDriveSource(org.apache.gobblin.source.extractor.extract.google.GoogleDriveSource)

Example 35 with WorkUnitState

use of org.apache.gobblin.configuration.WorkUnitState in project incubator-gobblin by apache.

the class AbstractJobLauncher method cleanLeftoverStagingData.

/**
 * Cleanup the left-over staging data possibly from the previous run of the job that may have failed
 * and not cleaned up its staging data.
 *
 * Property {@link ConfigurationKeys#CLEANUP_STAGING_DATA_PER_TASK} controls whether to cleanup
 * staging data per task, or to cleanup entire job's staging data at once.
 *
 * Staging data will not be cleaned if the job has unfinished {@link CommitSequence}s.
 */
private void cleanLeftoverStagingData(WorkUnitStream workUnits, JobState jobState) throws JobException {
    if (jobState.getPropAsBoolean(ConfigurationKeys.CLEANUP_STAGING_DATA_BY_INITIALIZER, false)) {
        // Clean up will be done by initializer.
        return;
    }
    try {
        if (!canCleanStagingData(jobState)) {
            LOG.error("Job " + jobState.getJobName() + " has unfinished commit sequences. Will not clean up staging data.");
            return;
        }
    } catch (IOException e) {
        throw new JobException("Failed to check unfinished commit sequences", e);
    }
    try {
        if (this.jobContext.shouldCleanupStagingDataPerTask()) {
            if (workUnits.isSafeToMaterialize()) {
                Closer closer = Closer.create();
                Map<String, ParallelRunner> parallelRunners = Maps.newHashMap();
                try {
                    for (WorkUnit workUnit : JobLauncherUtils.flattenWorkUnits(workUnits.getMaterializedWorkUnitCollection())) {
                        JobLauncherUtils.cleanTaskStagingData(new WorkUnitState(workUnit, jobState), LOG, closer, parallelRunners);
                    }
                } catch (Throwable t) {
                    throw closer.rethrow(t);
                } finally {
                    closer.close();
                }
            } else {
                throw new RuntimeException("Work unit streams do not support cleaning staging data per task.");
            }
        } else {
            if (jobState.getPropAsBoolean(ConfigurationKeys.CLEANUP_OLD_JOBS_DATA, ConfigurationKeys.DEFAULT_CLEANUP_OLD_JOBS_DATA)) {
                JobLauncherUtils.cleanUpOldJobData(jobState, LOG, jobContext.getStagingDirProvided(), jobContext.getOutputDirProvided());
            }
            JobLauncherUtils.cleanJobStagingData(jobState, LOG);
        }
    } catch (Throwable t) {
        // Catch Throwable instead of just IOException to make sure failure of this won't affect the current run
        LOG.error("Failed to clean leftover staging data", t);
    }
}
Also used : Closer(com.google.common.io.Closer) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) IOException(java.io.IOException) MultiWorkUnit(org.apache.gobblin.source.workunit.MultiWorkUnit) WorkUnit(org.apache.gobblin.source.workunit.WorkUnit) ParallelRunner(org.apache.gobblin.util.ParallelRunner)

Aggregations

WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)222 Test (org.testng.annotations.Test)143 State (org.apache.gobblin.configuration.State)48 SourceState (org.apache.gobblin.configuration.SourceState)39 WorkUnit (org.apache.gobblin.source.workunit.WorkUnit)39 Schema (org.apache.avro.Schema)29 Path (org.apache.hadoop.fs.Path)26 GenericRecord (org.apache.avro.generic.GenericRecord)19 JsonObject (com.google.gson.JsonObject)17 ArrayList (java.util.ArrayList)16 File (java.io.File)14 TaskState (org.apache.hadoop.mapreduce.v2.api.records.TaskState)12 List (java.util.List)11 Configuration (org.apache.hadoop.conf.Configuration)11 IOException (java.io.IOException)10 LongWatermark (org.apache.gobblin.source.extractor.extract.LongWatermark)10 Extract (org.apache.gobblin.source.workunit.Extract)10 FileSystem (org.apache.hadoop.fs.FileSystem)10 Closer (com.google.common.io.Closer)8 JsonParser (com.google.gson.JsonParser)8