use of com.google.cloud.spanner.Partition in project java-docs-samples by GoogleCloudPlatform.
the class BatchSample method main.
/**
* This example showcases how to create a batch client, partition a query, and concurrently read
* from multiple partitions.
*/
public static void main(String[] args) throws InterruptedException {
if (args.length != 2) {
System.err.println("Usage: BatchSample <instance_id> <database_id>");
return;
}
/*
* CREATE TABLE Singers (
* SingerId INT64 NOT NULL,
* FirstName STRING(1024),
* LastName STRING(1024),
* SingerInfo BYTES(MAX),
* ) PRIMARY KEY (SingerId);
*/
String instanceId = args[0];
String databaseId = args[1];
SpannerOptions options = SpannerOptions.newBuilder().build();
Spanner spanner = options.getService();
// [START spanner_batch_client]
int numThreads = Runtime.getRuntime().availableProcessors();
ExecutorService executor = Executors.newFixedThreadPool(numThreads);
// Statistics
int totalPartitions;
AtomicInteger totalRecords = new AtomicInteger(0);
try {
BatchClient batchClient = spanner.getBatchClient(DatabaseId.of(options.getProjectId(), instanceId, databaseId));
final BatchReadOnlyTransaction txn = batchClient.batchReadOnlyTransaction(TimestampBound.strong());
// A Partition object is serializable and can be used from a different process.
List<Partition> partitions = txn.partitionQuery(PartitionOptions.getDefaultInstance(), Statement.of("SELECT SingerId, FirstName, LastName FROM Singers"));
totalPartitions = partitions.size();
for (final Partition p : partitions) {
executor.execute(() -> {
try (ResultSet results = txn.execute(p)) {
while (results.next()) {
long singerId = results.getLong(0);
String firstName = results.getString(1);
String lastName = results.getString(2);
System.out.println("[" + singerId + "] " + firstName + " " + lastName);
totalRecords.getAndIncrement();
}
}
});
}
} finally {
executor.shutdown();
executor.awaitTermination(1, TimeUnit.HOURS);
spanner.close();
}
double avgRecordsPerPartition = 0.0;
if (totalPartitions != 0) {
avgRecordsPerPartition = (double) totalRecords.get() / totalPartitions;
}
System.out.println("totalPartitions=" + totalPartitions);
System.out.println("totalRecords=" + totalRecords);
System.out.println("avgRecordsPerPartition=" + avgRecordsPerPartition);
// [END spanner_batch_client]
}
use of com.google.cloud.spanner.Partition in project beam by apache.
the class BatchSpannerRead method expand.
@Override
public PCollection<Struct> expand(PCollection<ReadOperation> input) {
PCollectionView<Transaction> txView = getTxView();
if (txView == null) {
Pipeline begin = input.getPipeline();
SpannerIO.CreateTransaction createTx = SpannerIO.createTransaction().withSpannerConfig(getSpannerConfig()).withTimestampBound(getTimestampBound());
txView = begin.apply(createTx);
}
return input.apply("Generate Partitions", ParDo.of(new GeneratePartitionsFn(getSpannerConfig(), txView)).withSideInputs(txView)).apply("Shuffle partitions", Reshuffle.<Partition>viaRandomKey()).apply("Read from Partitions", ParDo.of(new ReadFromPartitionFn(getSpannerConfig(), txView)).withSideInputs(txView));
}
use of com.google.cloud.spanner.Partition in project beam by apache.
the class SpannerIOReadTest method testReadMetricsFail.
@Test
public void testReadMetricsFail() throws Exception {
Timestamp timestamp = Timestamp.ofTimeMicroseconds(12345);
TimestampBound timestampBound = TimestampBound.ofReadTimestamp(timestamp);
SpannerConfig spannerConfig = getSpannerConfig();
pipeline.apply("read q", SpannerIO.read().withSpannerConfig(spannerConfig).withTable("users").withColumns("id", "name").withTimestampBound(timestampBound));
FakeBatchTransactionId id = new FakeBatchTransactionId("runReadTest");
when(mockBatchTx.getBatchTransactionId()).thenReturn(id);
when(serviceFactory.mockBatchClient().batchReadOnlyTransaction(timestampBound)).thenReturn(mockBatchTx);
when(serviceFactory.mockBatchClient().batchReadOnlyTransaction(any(BatchTransactionId.class))).thenReturn(mockBatchTx);
Partition fakePartition = FakePartitionFactory.createFakeReadPartition(ByteString.copyFromUtf8("one"));
when(mockBatchTx.partitionRead(any(PartitionOptions.class), eq("users"), eq(KeySet.all()), eq(Arrays.asList("id", "name")), any(ReadQueryUpdateTransactionOption.class))).thenReturn(Arrays.asList(fakePartition));
when(mockBatchTx.execute(any(Partition.class))).thenThrow(SpannerExceptionFactory.newSpannerException(ErrorCode.DEADLINE_EXCEEDED, "Simulated Timeout 1"));
try {
pipeline.run();
} catch (PipelineExecutionException e) {
if (e.getCause() instanceof SpannerException && ((SpannerException) e.getCause()).getErrorCode().getGrpcStatusCode() == Code.DEADLINE_EXCEEDED) {
// expected
} else {
throw e;
}
}
verifyMetricWasSet("test", "aaa", "123", "deadline_exceeded", null, 1);
verifyMetricWasSet("test", "aaa", "123", "ok", null, 0);
}
use of com.google.cloud.spanner.Partition in project beam by apache.
the class SpannerIOReadTest method runQueryWithPriority.
@Test
public void runQueryWithPriority() throws Exception {
Timestamp timestamp = Timestamp.ofTimeMicroseconds(12345);
TimestampBound timestampBound = TimestampBound.ofReadTimestamp(timestamp);
SpannerConfig spannerConfig = getSpannerConfig();
Read read = SpannerIO.read().withSpannerConfig(spannerConfig).withQuery("SELECT * FROM users").withTimestampBound(timestampBound).withHighPriority();
PCollection<Struct> one = pipeline.apply("read q", read);
FakeBatchTransactionId id = new FakeBatchTransactionId("runQueryTest");
when(mockBatchTx.getBatchTransactionId()).thenReturn(id);
when(serviceFactory.mockBatchClient().batchReadOnlyTransaction(timestampBound)).thenReturn(mockBatchTx);
when(serviceFactory.mockBatchClient().batchReadOnlyTransaction(any(BatchTransactionId.class))).thenReturn(mockBatchTx);
Partition fakePartition = FakePartitionFactory.createFakeQueryPartition(ByteString.copyFromUtf8("one"));
when(mockBatchTx.partitionQuery(any(PartitionOptions.class), eq(Statement.of("SELECT * FROM users")), any(ReadQueryUpdateTransactionOption.class))).thenReturn(Arrays.asList(fakePartition, fakePartition));
when(mockBatchTx.execute(any(Partition.class))).thenReturn(ResultSets.forRows(FAKE_TYPE, FAKE_ROWS.subList(0, 2)), ResultSets.forRows(FAKE_TYPE, FAKE_ROWS.subList(2, 6)));
PAssert.that(one).containsInAnyOrder(FAKE_ROWS);
assertEquals(RpcPriority.HIGH, read.getSpannerConfig().getRpcPriority().get());
pipeline.run();
}
use of com.google.cloud.spanner.Partition in project beam by apache.
the class SpannerIOReadTest method readAllPipeline.
@Test
public void readAllPipeline() throws Exception {
Timestamp timestamp = Timestamp.ofTimeMicroseconds(12345);
TimestampBound timestampBound = TimestampBound.ofReadTimestamp(timestamp);
SpannerConfig spannerConfig = getSpannerConfig();
PCollectionView<Transaction> tx = pipeline.apply("tx", SpannerIO.createTransaction().withSpannerConfig(spannerConfig).withTimestampBound(timestampBound));
PCollection<ReadOperation> reads = pipeline.apply(Create.of(ReadOperation.create().withQuery("SELECT * FROM users"), ReadOperation.create().withTable("users").withColumns("id", "name")));
PCollection<Struct> one = reads.apply("read all", SpannerIO.readAll().withSpannerConfig(spannerConfig).withTransaction(tx));
BatchTransactionId txId = new FakeBatchTransactionId("tx");
when(mockBatchTx.getBatchTransactionId()).thenReturn(txId);
when(serviceFactory.mockBatchClient().batchReadOnlyTransaction(timestampBound)).thenReturn(mockBatchTx);
when(serviceFactory.mockBatchClient().batchReadOnlyTransaction(any(BatchTransactionId.class))).thenReturn(mockBatchTx);
Partition fakePartition = FakePartitionFactory.createFakeReadPartition(ByteString.copyFromUtf8("partition"));
when(mockBatchTx.partitionQuery(any(PartitionOptions.class), eq(Statement.of("SELECT * FROM users")), any(ReadQueryUpdateTransactionOption.class))).thenReturn(Arrays.asList(fakePartition, fakePartition));
when(mockBatchTx.partitionRead(any(PartitionOptions.class), eq("users"), eq(KeySet.all()), eq(Arrays.asList("id", "name")), any(ReadQueryUpdateTransactionOption.class))).thenReturn(Arrays.asList(fakePartition));
when(mockBatchTx.execute(any(Partition.class))).thenReturn(ResultSets.forRows(FAKE_TYPE, FAKE_ROWS.subList(0, 2)), ResultSets.forRows(FAKE_TYPE, FAKE_ROWS.subList(2, 4)), ResultSets.forRows(FAKE_TYPE, FAKE_ROWS.subList(4, 6)));
PAssert.that(one).containsInAnyOrder(FAKE_ROWS);
pipeline.run();
}
Aggregations