use of com.google.api.services.webmasters.model.ApiDimensionFilter in project incubator-gobblin by apache.
the class GoogleWebmasterExtractorIteratorTest method testIterator.
/**
* Test the GoogleWebmasterExtractorIterator to make sure that it first gets all pages based on the filters
* and then for each page, it asks for the queries.
* @throws IOException
*/
@Test
public void testIterator() throws IOException {
GoogleWebmasterDataFetcher client = Mockito.mock(GoogleWebmasterDataFetcher.class);
String country = "USA";
String date = "2016-11-01";
ArrayList<GoogleWebmasterFilter.Dimension> requestedDimensions = new ArrayList<>();
ArrayList<GoogleWebmasterDataFetcher.Metric> requestedMetrics = new ArrayList<>();
ArrayDeque<ProducerJob> allJobs = new ArrayDeque<>();
String page1 = siteProperty + "a/1";
String page2 = siteProperty + "b/1";
allJobs.add(new SimpleProducerJob(page1, date, date));
allJobs.add(new SimpleProducerJob(page2, date, date));
Mockito.when(client.getAllPages(eq(date), eq(date), eq(country), eq(GoogleWebmasterClient.API_ROW_LIMIT))).thenReturn(allJobs);
// Set performSearchAnalyticsQuery Mock1
String[] a1 = { "r1-c1", "r1-c2" };
List<String[]> results1 = new ArrayList<>();
results1.add(a1);
List<ApiDimensionFilter> filters1 = new ArrayList<>();
filters1.add(GoogleWebmasterFilter.countryEqFilter(country));
filters1.add(GoogleWebmasterFilter.pageFilter(GoogleWebmasterFilter.FilterOperator.EQUALS, page1));
Mockito.when(client.performSearchAnalyticsQuery(eq(date), eq(date), eq(GoogleWebmasterClient.API_ROW_LIMIT), eq(requestedDimensions), eq(requestedMetrics), argThat(new CollectionEquals(filters1)))).thenReturn(results1);
// Set performSearchAnalyticsQuery Mock2
String[] a2 = { "r2-c1", "r2-c2" };
List<String[]> results2 = new ArrayList<>();
results2.add(a2);
List<ApiDimensionFilter> filters2 = new ArrayList<>();
filters2.add(GoogleWebmasterFilter.countryEqFilter(country));
filters2.add(GoogleWebmasterFilter.pageFilter(GoogleWebmasterFilter.FilterOperator.EQUALS, page2));
Mockito.when(client.performSearchAnalyticsQuery(eq(date), eq(date), eq(5000), eq(requestedDimensions), eq(requestedMetrics), argThat(new CollectionEquals(filters2)))).thenReturn(results2);
Map<GoogleWebmasterFilter.Dimension, ApiDimensionFilter> map = new HashMap<>();
map.put(GoogleWebmasterFilter.Dimension.COUNTRY, GoogleWebmasterFilter.countryEqFilter(country));
WorkUnitState defaultState = GoogleWebmasterExtractorTest.getWorkUnitState1();
defaultState.setProp(GoogleWebMasterSource.KEY_QUERIES_TUNING_BATCH_SIZE, 1);
GoogleWebmasterExtractorIterator iterator = new GoogleWebmasterExtractorIterator(client, date, date, requestedDimensions, requestedMetrics, map, defaultState);
List<String[]> response = new ArrayList<>();
response.add(iterator.next());
response.add(iterator.next());
Assert.assertTrue(!iterator.hasNext());
Assert.assertTrue(response.contains(a1));
Assert.assertTrue(response.contains(a2));
Mockito.verify(client, Mockito.times(1)).getAllPages(eq(date), eq(date), eq(country), eq(5000));
Mockito.verify(client, Mockito.times(1)).performSearchAnalyticsQuery(eq(date), eq(date), eq(5000), eq(requestedDimensions), eq(requestedMetrics), argThat(new CollectionEquals(filters1)));
Mockito.verify(client, Mockito.times(1)).performSearchAnalyticsQuery(eq(date), eq(date), eq(5000), eq(requestedDimensions), eq(requestedMetrics), argThat(new CollectionEquals(filters2)));
}
use of com.google.api.services.webmasters.model.ApiDimensionFilter in project incubator-gobblin by apache.
the class GoogleWebmasterDataFetcherImpl method getAllPages.
/**
* Due to the limitation of the API, we can get a maximum of 5000 rows at a time. Another limitation is that, results are sorted by click count descending. If two rows have the same click count, they are sorted in an arbitrary way. (Read more at https://developers.google.com/webmaster-tools/v3/searchanalytics). So we try to get all pages by partitions, if a partition has 5000 rows returned. We try partition current partition into more granular levels.
*/
@Override
public Collection<ProducerJob> getAllPages(String startDate, String endDate, String country, int rowLimit) throws IOException {
log.info("Requested row limit: " + rowLimit);
if (!_jobs.isEmpty()) {
log.info("Service got hot started.");
return _jobs;
}
ApiDimensionFilter countryFilter = GoogleWebmasterFilter.countryEqFilter(country);
List<GoogleWebmasterFilter.Dimension> requestedDimensions = new ArrayList<>();
requestedDimensions.add(GoogleWebmasterFilter.Dimension.PAGE);
int expectedSize = -1;
if (rowLimit >= GoogleWebmasterClient.API_ROW_LIMIT) {
// expected size only makes sense when the data set size is larger than GoogleWebmasterClient.API_ROW_LIMIT
expectedSize = getPagesSize(startDate, endDate, country, requestedDimensions, Arrays.asList(countryFilter));
log.info(String.format("Expected number of pages is %d for market-%s from %s to %s", expectedSize, GoogleWebmasterFilter.countryFilterToString(countryFilter), startDate, endDate));
}
Queue<Pair<String, FilterOperator>> jobs = new ArrayDeque<>();
jobs.add(Pair.of(_siteProperty, FilterOperator.CONTAINS));
Collection<String> allPages = getPages(startDate, endDate, requestedDimensions, countryFilter, jobs, Math.min(rowLimit, GoogleWebmasterClient.API_ROW_LIMIT));
int actualSize = allPages.size();
log.info(String.format("A total of %d pages fetched for property %s at country-%s from %s to %s", actualSize, _siteProperty, country, startDate, endDate));
if (expectedSize != -1 && actualSize != expectedSize) {
log.warn(String.format("Expected page size is %d, but only able to get %d", expectedSize, actualSize));
}
ArrayDeque<ProducerJob> producerJobs = new ArrayDeque<>(actualSize);
for (String page : allPages) {
producerJobs.add(new SimpleProducerJob(page, startDate, endDate));
}
return producerJobs;
}
use of com.google.api.services.webmasters.model.ApiDimensionFilter in project incubator-gobblin by apache.
the class GoogleWebmasterDataFetcherImpl method performSearchAnalyticsQueryInBatch.
@Override
public void performSearchAnalyticsQueryInBatch(List<ProducerJob> jobs, List<ArrayList<ApiDimensionFilter>> filterList, List<JsonBatchCallback<SearchAnalyticsQueryResponse>> callbackList, List<Dimension> requestedDimensions, int rowLimit) throws IOException {
BatchRequest batchRequest = _client.createBatch();
for (int i = 0; i < jobs.size(); ++i) {
ProducerJob job = jobs.get(i);
ArrayList<ApiDimensionFilter> filters = filterList.get(i);
JsonBatchCallback<SearchAnalyticsQueryResponse> callback = callbackList.get(i);
_client.createSearchAnalyticsQuery(_siteProperty, job.getStartDate(), job.getEndDate(), requestedDimensions, GoogleWebmasterFilter.andGroupFilters(filters), rowLimit, 0).queue(batchRequest, callback);
}
batchRequest.execute();
}
Aggregations