Search in sources :

Example 1 with ApiDimensionFilter

use of com.google.api.services.webmasters.model.ApiDimensionFilter in project incubator-gobblin by apache.

the class GoogleWebmasterExtractorIteratorTest method testIterator.

/**
 * Test the GoogleWebmasterExtractorIterator to make sure that it first gets all pages based on the filters
 * and then for each page, it asks for the queries.
 * @throws IOException
 */
@Test
public void testIterator() throws IOException {
    GoogleWebmasterDataFetcher client = Mockito.mock(GoogleWebmasterDataFetcher.class);
    String country = "USA";
    String date = "2016-11-01";
    ArrayList<GoogleWebmasterFilter.Dimension> requestedDimensions = new ArrayList<>();
    ArrayList<GoogleWebmasterDataFetcher.Metric> requestedMetrics = new ArrayList<>();
    ArrayDeque<ProducerJob> allJobs = new ArrayDeque<>();
    String page1 = siteProperty + "a/1";
    String page2 = siteProperty + "b/1";
    allJobs.add(new SimpleProducerJob(page1, date, date));
    allJobs.add(new SimpleProducerJob(page2, date, date));
    Mockito.when(client.getAllPages(eq(date), eq(date), eq(country), eq(GoogleWebmasterClient.API_ROW_LIMIT))).thenReturn(allJobs);
    // Set performSearchAnalyticsQuery Mock1
    String[] a1 = { "r1-c1", "r1-c2" };
    List<String[]> results1 = new ArrayList<>();
    results1.add(a1);
    List<ApiDimensionFilter> filters1 = new ArrayList<>();
    filters1.add(GoogleWebmasterFilter.countryEqFilter(country));
    filters1.add(GoogleWebmasterFilter.pageFilter(GoogleWebmasterFilter.FilterOperator.EQUALS, page1));
    Mockito.when(client.performSearchAnalyticsQuery(eq(date), eq(date), eq(GoogleWebmasterClient.API_ROW_LIMIT), eq(requestedDimensions), eq(requestedMetrics), argThat(new CollectionEquals(filters1)))).thenReturn(results1);
    // Set performSearchAnalyticsQuery Mock2
    String[] a2 = { "r2-c1", "r2-c2" };
    List<String[]> results2 = new ArrayList<>();
    results2.add(a2);
    List<ApiDimensionFilter> filters2 = new ArrayList<>();
    filters2.add(GoogleWebmasterFilter.countryEqFilter(country));
    filters2.add(GoogleWebmasterFilter.pageFilter(GoogleWebmasterFilter.FilterOperator.EQUALS, page2));
    Mockito.when(client.performSearchAnalyticsQuery(eq(date), eq(date), eq(5000), eq(requestedDimensions), eq(requestedMetrics), argThat(new CollectionEquals(filters2)))).thenReturn(results2);
    Map<GoogleWebmasterFilter.Dimension, ApiDimensionFilter> map = new HashMap<>();
    map.put(GoogleWebmasterFilter.Dimension.COUNTRY, GoogleWebmasterFilter.countryEqFilter(country));
    WorkUnitState defaultState = GoogleWebmasterExtractorTest.getWorkUnitState1();
    defaultState.setProp(GoogleWebMasterSource.KEY_QUERIES_TUNING_BATCH_SIZE, 1);
    GoogleWebmasterExtractorIterator iterator = new GoogleWebmasterExtractorIterator(client, date, date, requestedDimensions, requestedMetrics, map, defaultState);
    List<String[]> response = new ArrayList<>();
    response.add(iterator.next());
    response.add(iterator.next());
    Assert.assertTrue(!iterator.hasNext());
    Assert.assertTrue(response.contains(a1));
    Assert.assertTrue(response.contains(a2));
    Mockito.verify(client, Mockito.times(1)).getAllPages(eq(date), eq(date), eq(country), eq(5000));
    Mockito.verify(client, Mockito.times(1)).performSearchAnalyticsQuery(eq(date), eq(date), eq(5000), eq(requestedDimensions), eq(requestedMetrics), argThat(new CollectionEquals(filters1)));
    Mockito.verify(client, Mockito.times(1)).performSearchAnalyticsQuery(eq(date), eq(date), eq(5000), eq(requestedDimensions), eq(requestedMetrics), argThat(new CollectionEquals(filters2)));
}
Also used : ApiDimensionFilter(com.google.api.services.webmasters.model.ApiDimensionFilter) HashMap(java.util.HashMap) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) ArrayList(java.util.ArrayList) ArrayDeque(java.util.ArrayDeque) Test(org.testng.annotations.Test)

Example 2 with ApiDimensionFilter

use of com.google.api.services.webmasters.model.ApiDimensionFilter in project incubator-gobblin by apache.

the class GoogleWebmasterDataFetcherImpl method getAllPages.

/**
 * Due to the limitation of the API, we can get a maximum of 5000 rows at a time. Another limitation is that, results are sorted by click count descending. If two rows have the same click count, they are sorted in an arbitrary way. (Read more at https://developers.google.com/webmaster-tools/v3/searchanalytics). So we try to get all pages by partitions, if a partition has 5000 rows returned. We try partition current partition into more granular levels.
 */
@Override
public Collection<ProducerJob> getAllPages(String startDate, String endDate, String country, int rowLimit) throws IOException {
    log.info("Requested row limit: " + rowLimit);
    if (!_jobs.isEmpty()) {
        log.info("Service got hot started.");
        return _jobs;
    }
    ApiDimensionFilter countryFilter = GoogleWebmasterFilter.countryEqFilter(country);
    List<GoogleWebmasterFilter.Dimension> requestedDimensions = new ArrayList<>();
    requestedDimensions.add(GoogleWebmasterFilter.Dimension.PAGE);
    int expectedSize = -1;
    if (rowLimit >= GoogleWebmasterClient.API_ROW_LIMIT) {
        // expected size only makes sense when the data set size is larger than GoogleWebmasterClient.API_ROW_LIMIT
        expectedSize = getPagesSize(startDate, endDate, country, requestedDimensions, Arrays.asList(countryFilter));
        log.info(String.format("Expected number of pages is %d for market-%s from %s to %s", expectedSize, GoogleWebmasterFilter.countryFilterToString(countryFilter), startDate, endDate));
    }
    Queue<Pair<String, FilterOperator>> jobs = new ArrayDeque<>();
    jobs.add(Pair.of(_siteProperty, FilterOperator.CONTAINS));
    Collection<String> allPages = getPages(startDate, endDate, requestedDimensions, countryFilter, jobs, Math.min(rowLimit, GoogleWebmasterClient.API_ROW_LIMIT));
    int actualSize = allPages.size();
    log.info(String.format("A total of %d pages fetched for property %s at country-%s from %s to %s", actualSize, _siteProperty, country, startDate, endDate));
    if (expectedSize != -1 && actualSize != expectedSize) {
        log.warn(String.format("Expected page size is %d, but only able to get %d", expectedSize, actualSize));
    }
    ArrayDeque<ProducerJob> producerJobs = new ArrayDeque<>(actualSize);
    for (String page : allPages) {
        producerJobs.add(new SimpleProducerJob(page, startDate, endDate));
    }
    return producerJobs;
}
Also used : ApiDimensionFilter(com.google.api.services.webmasters.model.ApiDimensionFilter) ArrayList(java.util.ArrayList) Dimension(org.apache.gobblin.ingestion.google.webmaster.GoogleWebmasterFilter.Dimension) GoogleWebmasterFilter.countryFilterToString(org.apache.gobblin.ingestion.google.webmaster.GoogleWebmasterFilter.countryFilterToString) ArrayDeque(java.util.ArrayDeque) Pair(org.apache.commons.lang3.tuple.Pair)

Example 3 with ApiDimensionFilter

use of com.google.api.services.webmasters.model.ApiDimensionFilter in project incubator-gobblin by apache.

the class GoogleWebmasterDataFetcherImpl method performSearchAnalyticsQueryInBatch.

@Override
public void performSearchAnalyticsQueryInBatch(List<ProducerJob> jobs, List<ArrayList<ApiDimensionFilter>> filterList, List<JsonBatchCallback<SearchAnalyticsQueryResponse>> callbackList, List<Dimension> requestedDimensions, int rowLimit) throws IOException {
    BatchRequest batchRequest = _client.createBatch();
    for (int i = 0; i < jobs.size(); ++i) {
        ProducerJob job = jobs.get(i);
        ArrayList<ApiDimensionFilter> filters = filterList.get(i);
        JsonBatchCallback<SearchAnalyticsQueryResponse> callback = callbackList.get(i);
        _client.createSearchAnalyticsQuery(_siteProperty, job.getStartDate(), job.getEndDate(), requestedDimensions, GoogleWebmasterFilter.andGroupFilters(filters), rowLimit, 0).queue(batchRequest, callback);
    }
    batchRequest.execute();
}
Also used : BatchRequest(com.google.api.client.googleapis.batch.BatchRequest) SearchAnalyticsQueryResponse(com.google.api.services.webmasters.model.SearchAnalyticsQueryResponse) ApiDimensionFilter(com.google.api.services.webmasters.model.ApiDimensionFilter)

Aggregations

ApiDimensionFilter (com.google.api.services.webmasters.model.ApiDimensionFilter)3 ArrayDeque (java.util.ArrayDeque)2 ArrayList (java.util.ArrayList)2 BatchRequest (com.google.api.client.googleapis.batch.BatchRequest)1 SearchAnalyticsQueryResponse (com.google.api.services.webmasters.model.SearchAnalyticsQueryResponse)1 HashMap (java.util.HashMap)1 Pair (org.apache.commons.lang3.tuple.Pair)1 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)1 Dimension (org.apache.gobblin.ingestion.google.webmaster.GoogleWebmasterFilter.Dimension)1 GoogleWebmasterFilter.countryFilterToString (org.apache.gobblin.ingestion.google.webmaster.GoogleWebmasterFilter.countryFilterToString)1 Test (org.testng.annotations.Test)1