use of org.apache.beam.sdk.PipelineResult.State in project beam by apache.
the class BigQueryReadWriteIT method testSQLWriteAndRead_withDirectRead.
@Test
public void testSQLWriteAndRead_withDirectRead() {
BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new BigQueryTableProvider());
String createTableStatement = "CREATE EXTERNAL TABLE TEST( \n" + " c_bigint BIGINT, \n" + " c_tinyint TINYINT, \n" + " c_smallint SMALLINT, \n" + " c_integer INTEGER, \n" + " c_float FLOAT, \n" + " c_double DOUBLE, \n" + " c_boolean BOOLEAN, \n" + " c_timestamp TIMESTAMP, \n" + " c_varchar VARCHAR, \n " + " c_char CHAR, \n" + " c_arr ARRAY<VARCHAR> \n" + ") \n" + "TYPE 'bigquery' \n" + "LOCATION '" + bigQueryTestingTypes.tableSpec() + "' \n" + "TBLPROPERTIES " + "'{ " + METHOD_PROPERTY + ": \"" + Method.DIRECT_READ.toString() + "\" }'";
sqlEnv.executeDdl(createTableStatement);
String insertStatement = "INSERT INTO TEST VALUES (" + "9223372036854775807, " + "127, " + "32767, " + "2147483647, " + "1.0, " + "1.0, " + "TRUE, " + "TIMESTAMP '2018-05-28 20:17:40.123', " + "'varchar', " + "'char', " + "ARRAY['123', '456']" + ")";
sqlEnv.parseQuery(insertStatement);
BeamSqlRelUtils.toPCollection(pipeline, sqlEnv.parseQuery(insertStatement));
pipeline.run().waitUntilFinish(Duration.standardMinutes(5));
String selectTableStatement = "SELECT * FROM TEST";
PCollection<Row> output = BeamSqlRelUtils.toPCollection(readPipeline, sqlEnv.parseQuery(selectTableStatement));
PAssert.that(output).containsInAnyOrder(row(SOURCE_SCHEMA_TWO, 9223372036854775807L, (byte) 127, (short) 32767, 2147483647, (float) 1.0, 1.0, true, parseTimestampWithUTCTimeZone("2018-05-28 20:17:40.123"), "varchar", "char", Arrays.asList("123", "456")));
PipelineResult.State state = readPipeline.run().waitUntilFinish(Duration.standardMinutes(5));
assertThat(state, equalTo(State.DONE));
}
use of org.apache.beam.sdk.PipelineResult.State in project beam by apache.
the class BigQueryReadWriteIT method testSQLRead_withDirectRead_withProjectAndFilterPushDown.
@Test
public void testSQLRead_withDirectRead_withProjectAndFilterPushDown() {
BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new BigQueryTableProvider());
String createTableStatement = "CREATE EXTERNAL TABLE TEST( \n" + " c_bigint BIGINT, \n" + " c_tinyint TINYINT, \n" + " c_smallint SMALLINT, \n" + " c_integer INTEGER, \n" + " c_float FLOAT, \n" + " c_double DOUBLE, \n" + " c_boolean BOOLEAN, \n" + " c_timestamp TIMESTAMP, \n" + " c_varchar VARCHAR, \n " + " c_char CHAR, \n" + " c_arr ARRAY<VARCHAR> \n" + ") \n" + "TYPE 'bigquery' \n" + "LOCATION '" + bigQueryTestingTypes.tableSpec() + "' \n" + "TBLPROPERTIES " + "'{ " + METHOD_PROPERTY + ": \"" + Method.DIRECT_READ.toString() + "\" }'";
sqlEnv.executeDdl(createTableStatement);
String insertStatement = "INSERT INTO TEST VALUES (" + "9223372036854775807, " + "127, " + "32767, " + "2147483647, " + "1.0, " + "1.0, " + "TRUE, " + "TIMESTAMP '2018-05-28 20:17:40.123', " + "'varchar', " + "'char', " + "ARRAY['123', '456']" + ")";
sqlEnv.parseQuery(insertStatement);
BeamSqlRelUtils.toPCollection(pipeline, sqlEnv.parseQuery(insertStatement));
pipeline.run().waitUntilFinish(Duration.standardMinutes(5));
String selectTableStatement = "SELECT c_varchar, c_integer FROM TEST where c_tinyint=127";
BeamRelNode relNode = sqlEnv.parseQuery(selectTableStatement);
PCollection<Row> output = BeamSqlRelUtils.toPCollection(readPipeline, relNode);
assertThat(relNode, instanceOf(BeamPushDownIOSourceRel.class));
// Unused fields should not be projected by an IO
assertThat(relNode.getRowType().getFieldNames(), containsInAnyOrder("c_varchar", "c_integer"));
assertThat(output.getSchema(), equalTo(Schema.builder().addNullableField("c_varchar", STRING).addNullableField("c_integer", INT32).build()));
PAssert.that(output).containsInAnyOrder(row(output.getSchema(), "varchar", 2147483647));
PipelineResult.State state = readPipeline.run().waitUntilFinish(Duration.standardMinutes(5));
assertThat(state, equalTo(State.DONE));
}
use of org.apache.beam.sdk.PipelineResult.State in project beam by apache.
the class DataflowPipelineJobTest method testWaitToFinishTimeFail.
@Test
public void testWaitToFinishTimeFail() throws Exception {
Dataflow.Projects.Locations.Jobs.Get statusRequest = mock(Dataflow.Projects.Locations.Jobs.Get.class);
when(mockJobs.get(eq(PROJECT_ID), eq(REGION_ID), eq(JOB_ID))).thenReturn(statusRequest);
when(statusRequest.execute()).thenThrow(IOException.class);
DataflowPipelineJob job = new DataflowPipelineJob(DataflowClient.create(options), JOB_ID, options, ImmutableMap.of());
long startTime = fastClock.nanoTime();
State state = job.waitUntilFinish(Duration.millis(4), null, fastClock, fastClock);
assertEquals(null, state);
long timeDiff = TimeUnit.NANOSECONDS.toMillis(fastClock.nanoTime() - startTime);
// Should only have slept for the 4 ms allowed.
assertEquals(4L, timeDiff);
}
use of org.apache.beam.sdk.PipelineResult.State in project beam by apache.
the class DataflowPipelineJobTest method testWaitToFinishFail.
@Test
public void testWaitToFinishFail() throws Exception {
Dataflow.Projects.Locations.Jobs.Get statusRequest = mock(Dataflow.Projects.Locations.Jobs.Get.class);
when(mockJobs.get(eq(PROJECT_ID), eq(REGION_ID), eq(JOB_ID))).thenReturn(statusRequest);
when(statusRequest.execute()).thenThrow(IOException.class);
DataflowPipelineJob job = new DataflowPipelineJob(DataflowClient.create(options), JOB_ID, options, ImmutableMap.of());
long startTime = fastClock.nanoTime();
State state = job.waitUntilFinish(Duration.standardMinutes(5), null, fastClock, fastClock);
assertEquals(null, state);
long timeDiff = TimeUnit.NANOSECONDS.toMillis(fastClock.nanoTime() - startTime);
checkValidInterval(DataflowPipelineJob.MESSAGES_POLLING_INTERVAL, DataflowPipelineJob.MESSAGES_POLLING_RETRIES, timeDiff);
}
use of org.apache.beam.sdk.PipelineResult.State in project beam by apache.
the class DataflowPipelineJobTest method testCumulativeTimeOverflow.
@Test
public void testCumulativeTimeOverflow() throws Exception {
Dataflow.Projects.Locations.Jobs.Get statusRequest = mock(Dataflow.Projects.Locations.Jobs.Get.class);
Job statusResponse = new Job();
statusResponse.setCurrentState("JOB_STATE_RUNNING");
when(mockJobs.get(eq(PROJECT_ID), eq(REGION_ID), eq(JOB_ID))).thenReturn(statusRequest);
when(statusRequest.execute()).thenReturn(statusResponse);
FastNanoClockAndFuzzySleeper clock = new FastNanoClockAndFuzzySleeper();
DataflowPipelineJob job = new DataflowPipelineJob(DataflowClient.create(options), JOB_ID, options, ImmutableMap.of());
long startTime = clock.nanoTime();
State state = job.waitUntilFinish(Duration.millis(4), null, clock, clock);
assertEquals(null, state);
long timeDiff = TimeUnit.NANOSECONDS.toMillis(clock.nanoTime() - startTime);
// Should only have slept for the 4 ms allowed.
assertThat(timeDiff, lessThanOrEqualTo(4L));
}
Aggregations