use of com.google.cloud.hadoop.io.bigquery.DirectBigQueryInputFormat.DirectBigQueryInputSplit in project hadoop-connectors by GoogleCloudDataproc.
the class DirectBigQueryRecordReaderTest method testLimiting.
@Test
public void testLimiting() throws Exception {
when(rowsStream.iterator()).thenReturn(RESPONSES_123.iterator());
// Set limit lower
split = new DirectBigQueryInputSplit("session", RAW_SCHEMA, 1);
// Set up reader
initialize();
assertThat(reader.nextKeyValue()).isTrue();
verify(bqClient).finalizeStream(eq(STREAM));
assertThat(reader.getCurrentKey()).isEqualTo(NullWritable.get());
assertThat(reader.getCurrentValue()).isEqualTo(avroRecord(1));
for (int i = 2; i <= 3; i++) {
assertThat(reader.nextKeyValue()).isTrue();
assertThat(reader.getCurrentKey()).isEqualTo(NullWritable.get());
assertThat(reader.getCurrentValue()).isEqualTo(avroRecord(i));
}
assertThat(reader.nextKeyValue()).isFalse();
}
use of com.google.cloud.hadoop.io.bigquery.DirectBigQueryInputFormat.DirectBigQueryInputSplit in project hadoop-connectors by GoogleCloudDataproc.
the class DirectBigQueryRecordReader method initialize.
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
DirectBigQueryInputSplit split = (DirectBigQueryInputSplit) genericSplit;
schema = parser.parse(checkNotNull(split.getSchema(), "schema"));
stream = Stream.newBuilder().setName(checkNotNull(split.getName(), "name")).build();
ReadRowsRequest request = ReadRowsRequest.newBuilder().setReadPosition(StreamPosition.newBuilder().setStream(stream).build()).build();
client = getClient(context.getConfiguration());
responseIterator = client.readRowsCallable().call(request).iterator();
recordIterator = Collections.emptyIterator();
limit = split.getLimit();
idx = 0;
finalized = false;
}
use of com.google.cloud.hadoop.io.bigquery.DirectBigQueryInputFormat.DirectBigQueryInputSplit in project hadoop-connectors by GoogleCloudDataproc.
the class DirectBigQueryInputFormatTest method getSplits.
@Test
public void getSplits() throws IOException {
JobContext jobContext = new JobContextImpl(config, new JobID());
CreateReadSessionRequest request = CreateReadSessionRequest.newBuilder().setTableReference(TableReferenceProto.TableReference.newBuilder().setProjectId("publicdata").setDatasetId("test_dataset").setTableId("test_table")).setRequestedStreams(// request 3, but only get 2 back
3).setParent("projects/foo-project").setReadOptions(TableReadOptions.newBuilder().addAllSelectedFields(ImmutableList.of("foo", "bar")).setRowRestriction("foo == 0").build()).setFormat(DataFormat.AVRO).build();
ReadSession session = ReadSession.newBuilder().setAvroSchema(AvroSchema.newBuilder().setSchema("schema").build()).addAllStreams(ImmutableList.of(Stream.newBuilder().setName("stream1").build(), Stream.newBuilder().setName("stream2").build())).build();
ImmutableList<DirectBigQueryInputSplit> expected = ImmutableList.of(new DirectBigQueryInputSplit("stream1", "schema", 14), new DirectBigQueryInputSplit("stream2", "schema", 14));
when(bqClient.createReadSession(any(CreateReadSessionRequest.class))).thenReturn(session);
try {
List<InputSplit> splits = input.getSplits(jobContext);
assertThat(splits).containsExactlyElementsIn(expected);
} catch (Exception e) {
e.printStackTrace();
System.exit(1);
}
verify(bqHelper).getTable(tableRef);
verify(bqClient).createReadSession(request);
}
Aggregations