Search in sources :

Example 6 with ValueInSingleWindow

use of org.apache.beam.sdk.values.ValueInSingleWindow in project beam by apache.

the class StreamingWriteFn method finishBundle.

/** Writes the accumulated rows into BigQuery with streaming API. */
@FinishBundle
public void finishBundle(FinishBundleContext context) throws Exception {
    List<ValueInSingleWindow<TableRow>> failedInserts = Lists.newArrayList();
    BigQueryOptions options = context.getPipelineOptions().as(BigQueryOptions.class);
    for (Map.Entry<String, List<ValueInSingleWindow<TableRow>>> entry : tableRows.entrySet()) {
        TableReference tableReference = BigQueryHelpers.parseTableSpec(entry.getKey());
        flushRows(tableReference, entry.getValue(), uniqueIdsForTableRows.get(entry.getKey()), options, failedInserts);
    }
    tableRows.clear();
    uniqueIdsForTableRows.clear();
    for (ValueInSingleWindow<TableRow> row : failedInserts) {
        context.output(failedOutputTag, row.getValue(), row.getTimestamp(), row.getWindow());
    }
}
Also used : TableReference(com.google.api.services.bigquery.model.TableReference) TableRow(com.google.api.services.bigquery.model.TableRow) ValueInSingleWindow(org.apache.beam.sdk.values.ValueInSingleWindow) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map)

Example 7 with ValueInSingleWindow

use of org.apache.beam.sdk.values.ValueInSingleWindow in project beam by apache.

the class GatherAllPanesTest method multiplePanesMultipleReifiedPane.

@Test
@Category(NeedsRunner.class)
public void multiplePanesMultipleReifiedPane() {
    PCollection<Long> someElems = p.apply("someLongs", GenerateSequence.from(0).to(20000));
    PCollection<Long> otherElems = p.apply("otherLongs", GenerateSequence.from(0).to(20000));
    PCollection<Iterable<ValueInSingleWindow<Iterable<Long>>>> accumulatedPanes = PCollectionList.of(someElems).and(otherElems).apply(Flatten.<Long>pCollections()).apply(WithTimestamps.of(new SerializableFunction<Long, Instant>() {

        @Override
        public Instant apply(Long input) {
            return new Instant(input * 10);
        }
    })).apply(Window.<Long>into(FixedWindows.of(Duration.standardMinutes(1))).triggering(AfterWatermark.pastEndOfWindow().withEarlyFirings(AfterPane.elementCountAtLeast(1))).withAllowedLateness(Duration.ZERO).discardingFiredPanes()).apply(WithKeys.<Void, Long>of((Void) null).withKeyType(new TypeDescriptor<Void>() {
    })).apply(GroupByKey.<Void, Long>create()).apply(Values.<Iterable<Long>>create()).apply(GatherAllPanes.<Iterable<Long>>globally());
    PAssert.that(accumulatedPanes).satisfies(new SerializableFunction<Iterable<Iterable<ValueInSingleWindow<Iterable<Long>>>>, Void>() {

        @Override
        public Void apply(Iterable<Iterable<ValueInSingleWindow<Iterable<Long>>>> input) {
            for (Iterable<ValueInSingleWindow<Iterable<Long>>> windowedInput : input) {
                if (Iterables.size(windowedInput) > 1) {
                    return null;
                }
            }
            fail("Expected at least one window to have multiple panes");
            return null;
        }
    });
    p.run();
}
Also used : Instant(org.joda.time.Instant) ValueInSingleWindow(org.apache.beam.sdk.values.ValueInSingleWindow) Category(org.junit.experimental.categories.Category) Test(org.junit.Test)

Example 8 with ValueInSingleWindow

use of org.apache.beam.sdk.values.ValueInSingleWindow in project beam by apache.

the class BigQueryUtilTest method testInsertAll.

@Test
public void testInsertAll() throws Exception, IOException {
    // Build up a list of indices to fail on each invocation. This should result in
    // 5 calls to insertAll.
    List<List<Long>> errorsIndices = new ArrayList<>();
    errorsIndices.add(Arrays.asList(0L, 5L, 10L, 15L, 20L));
    errorsIndices.add(Arrays.asList(0L, 2L, 4L));
    errorsIndices.add(Arrays.asList(0L, 2L));
    errorsIndices.add(new ArrayList<Long>());
    onInsertAll(errorsIndices);
    TableReference ref = BigQueryHelpers.parseTableSpec("project:dataset.table");
    DatasetServiceImpl datasetService = new DatasetServiceImpl(mockClient, options, 5);
    List<ValueInSingleWindow<TableRow>> rows = new ArrayList<>();
    List<String> ids = new ArrayList<>();
    for (int i = 0; i < 25; ++i) {
        rows.add(ValueInSingleWindow.of(rawRow("foo", 1234), GlobalWindow.TIMESTAMP_MAX_VALUE, GlobalWindow.INSTANCE, PaneInfo.ON_TIME_AND_ONLY_FIRING));
        ids.add(new String());
    }
    long totalBytes = 0;
    try {
        totalBytes = datasetService.insertAll(ref, rows, ids, InsertRetryPolicy.alwaysRetry(), null);
    } finally {
        verifyInsertAll(5);
        // Each of the 25 rows is 23 bytes: "{f=[{v=foo}, {v=1234}]}"
        assertEquals("Incorrect byte count", 25L * 23L, totalBytes);
    }
}
Also used : DatasetServiceImpl(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl.DatasetServiceImpl) ArrayList(java.util.ArrayList) Matchers.anyString(org.mockito.Matchers.anyString) TableReference(com.google.api.services.bigquery.model.TableReference) Matchers.anyLong(org.mockito.Matchers.anyLong) ValueInSingleWindow(org.apache.beam.sdk.values.ValueInSingleWindow) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) LinkedList(java.util.LinkedList) List(java.util.List) TableDataList(com.google.api.services.bigquery.model.TableDataList) Test(org.junit.Test)

Example 9 with ValueInSingleWindow

use of org.apache.beam.sdk.values.ValueInSingleWindow in project beam by apache.

the class BigQueryServicesImplTest method testInsertRetryPolicy.

/**
   * Tests that {@link DatasetServiceImpl#insertAll} uses the supplied {@link InsertRetryPolicy},
   * and returns the list of rows not retried.
   */
@Test
public void testInsertRetryPolicy() throws InterruptedException, IOException {
    TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
    List<ValueInSingleWindow<TableRow>> rows = ImmutableList.of(wrapTableRow(new TableRow()), wrapTableRow(new TableRow()));
    // First time row0 fails with a retryable error, and row1 fails with a persistent error.
    final TableDataInsertAllResponse firstFailure = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(0L).setErrors(ImmutableList.of(new ErrorProto().setReason("timeout"))), new InsertErrors().setIndex(1L).setErrors(ImmutableList.of(new ErrorProto().setReason("invalid")))));
    // Second time there is only one row, which fails with a retryable error.
    final TableDataInsertAllResponse secondFialure = new TableDataInsertAllResponse().setInsertErrors(ImmutableList.of(new InsertErrors().setIndex(0L).setErrors(ImmutableList.of(new ErrorProto().setReason("timeout")))));
    // On the final attempt, no failures are returned.
    final TableDataInsertAllResponse allRowsSucceeded = new TableDataInsertAllResponse();
    when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
    // Always return 200.
    when(response.getStatusCode()).thenReturn(200);
    when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
    when(response.getStatusCode()).thenReturn(200).thenReturn(200);
    // First fail
    when(response.getContent()).thenReturn(toStream(firstFailure)).thenReturn(toStream(secondFialure)).thenReturn(toStream(allRowsSucceeded));
    DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
    List<ValueInSingleWindow<TableRow>> failedInserts = Lists.newArrayList();
    dataService.insertAll(ref, rows, null, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), new MockSleeper(), InsertRetryPolicy.retryTransientErrors(), failedInserts);
    assertEquals(1, failedInserts.size());
    expectedLogs.verifyInfo("Retrying 1 failed inserts to BigQuery");
}
Also used : TableReference(com.google.api.services.bigquery.model.TableReference) ErrorProto(com.google.api.services.bigquery.model.ErrorProto) DatasetServiceImpl(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl.DatasetServiceImpl) TableRow(com.google.api.services.bigquery.model.TableRow) TableDataInsertAllResponse(com.google.api.services.bigquery.model.TableDataInsertAllResponse) ValueInSingleWindow(org.apache.beam.sdk.values.ValueInSingleWindow) InsertErrors(com.google.api.services.bigquery.model.TableDataInsertAllResponse.InsertErrors) MockSleeper(com.google.api.client.testing.util.MockSleeper) Test(org.junit.Test)

Example 10 with ValueInSingleWindow

use of org.apache.beam.sdk.values.ValueInSingleWindow in project beam by apache.

the class BigQueryServicesImplTest method testInsertDoesNotRetry.

/**
   * Tests that {@link DatasetServiceImpl#insertAll} does not retry non-rate-limited attempts.
   */
@Test
public void testInsertDoesNotRetry() throws Throwable {
    TableReference ref = new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
    List<ValueInSingleWindow<TableRow>> rows = new ArrayList<>();
    rows.add(wrapTableRow(new TableRow()));
    // First response is 403 not-rate-limited, second response has valid payload but should not
    // be invoked.
    when(response.getContentType()).thenReturn(Json.MEDIA_TYPE);
    when(response.getStatusCode()).thenReturn(403).thenReturn(200);
    when(response.getContent()).thenReturn(toStream(errorWithReasonAndStatus("actually forbidden", 403))).thenReturn(toStream(new TableDataInsertAllResponse()));
    thrown.expect(GoogleJsonResponseException.class);
    thrown.expectMessage("actually forbidden");
    DatasetServiceImpl dataService = new DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
    try {
        dataService.insertAll(ref, rows, null, BackOffAdapter.toGcpBackOff(TEST_BACKOFF.backoff()), new MockSleeper(), InsertRetryPolicy.alwaysRetry(), null);
        fail();
    } catch (RuntimeException e) {
        verify(response, times(1)).getStatusCode();
        verify(response, times(1)).getContent();
        verify(response, times(1)).getContentType();
        throw e.getCause();
    }
}
Also used : TableReference(com.google.api.services.bigquery.model.TableReference) DatasetServiceImpl(org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl.DatasetServiceImpl) TableRow(com.google.api.services.bigquery.model.TableRow) TableDataInsertAllResponse(com.google.api.services.bigquery.model.TableDataInsertAllResponse) ArrayList(java.util.ArrayList) ValueInSingleWindow(org.apache.beam.sdk.values.ValueInSingleWindow) MockSleeper(com.google.api.client.testing.util.MockSleeper) Test(org.junit.Test)

Aggregations

ValueInSingleWindow (org.apache.beam.sdk.values.ValueInSingleWindow)10 TableReference (com.google.api.services.bigquery.model.TableReference)8 Test (org.junit.Test)8 TableRow (com.google.api.services.bigquery.model.TableRow)7 DatasetServiceImpl (org.apache.beam.sdk.io.gcp.bigquery.BigQueryServicesImpl.DatasetServiceImpl)6 MockSleeper (com.google.api.client.testing.util.MockSleeper)5 TableDataInsertAllResponse (com.google.api.services.bigquery.model.TableDataInsertAllResponse)5 ArrayList (java.util.ArrayList)4 InsertErrors (com.google.api.services.bigquery.model.TableDataInsertAllResponse.InsertErrors)3 ErrorProto (com.google.api.services.bigquery.model.ErrorProto)2 List (java.util.List)2 Map (java.util.Map)2 Instant (org.joda.time.Instant)2 Category (org.junit.experimental.categories.Category)2 TableDataList (com.google.api.services.bigquery.model.TableDataList)1 TableFieldSchema (com.google.api.services.bigquery.model.TableFieldSchema)1 TableSchema (com.google.api.services.bigquery.model.TableSchema)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1