use of com.google.spanner.v1.Mutation in project DataflowTemplates by GoogleCloudPlatform.
the class ParquetToBigtableTest method applyParquettoBigtableFn.
/**
* Test whether {@link ParquetToBigtable} correctly maps a GenericRecord to a KV.
*/
@Test
public void applyParquettoBigtableFn() throws Exception {
byte[] rowKey1 = "row1".getBytes();
ByteBuffer key1 = ByteBuffer.wrap(rowKey1);
List<BigtableCell> cells1 = new ArrayList<>();
addParquetCell(cells1, "family1", "column1", 1, "10");
addParquetCell(cells1, "family1", "column1", 2, "20");
addParquetCell(cells1, "family1", "column2", 1, "30");
addParquetCell(cells1, "family2", "column1", 1, "40");
GenericRecord parquetRow1 = new GenericRecordBuilder(BigtableRow.getClassSchema()).set("key", key1).set("cells", cells1).build();
byte[] rowKey2 = "row2".getBytes();
ByteBuffer key2 = ByteBuffer.wrap(rowKey2);
List<BigtableCell> cells2 = new ArrayList<>();
addParquetCell(cells2, "family2", "column2", 2, "40");
GenericRecord parquetRow2 = new GenericRecordBuilder(BigtableRow.getClassSchema()).set("key", key2).set("cells", cells2).build();
final List<GenericRecord> parquetRows = ImmutableList.of(parquetRow1, parquetRow2);
KV<ByteString, Iterable<Mutation>> rowMutations1 = createBigtableRowMutations("row1");
addBigtableMutation(rowMutations1, "family1", "column1", 1, "10");
addBigtableMutation(rowMutations1, "family1", "column1", 2, "20");
addBigtableMutation(rowMutations1, "family1", "column2", 1, "30");
addBigtableMutation(rowMutations1, "family2", "column1", 1, "40");
KV<ByteString, Iterable<Mutation>> rowMutations2 = createBigtableRowMutations("row2");
addBigtableMutation(rowMutations2, "family2", "column2", 2, "40");
final List<KV<ByteString, Iterable<Mutation>>> expectedBigtableRows = ImmutableList.of(rowMutations1, rowMutations2);
PCollection<KV<ByteString, Iterable<Mutation>>> bigtableRows = pipeline.apply("Create", Create.of(parquetRows).withCoder(AvroCoder.of(GenericRecord.class, BigtableRow.getClassSchema()))).apply("TransformToBigtable", ParDo.of(ParquetToBigtableFn.create()));
PAssert.that(bigtableRows).containsInAnyOrder(expectedBigtableRows);
pipeline.run().waitUntilFinish();
}
use of com.google.spanner.v1.Mutation in project DataflowTemplates by GoogleCloudPlatform.
the class BeamRowToBigtableFnTest method processElementWithListColumn.
@Test
public void processElementWithListColumn() {
String columnFamily = "default";
String rowKeyValue = "rowkeyvalue";
String rowKeyColumnName = "rowkey";
String listColumnName = "listColumnName";
List<String> listValue = new ArrayList<>();
listValue.add("first");
listValue.add("second");
listValue.add("third");
Schema schema = Schema.builder().addField(Schema.Field.of(rowKeyColumnName, FieldType.STRING.withMetadata(CassandraRowMapperFn.KEY_ORDER_METADATA_KEY, "0"))).addField(Schema.Field.of(listColumnName, FieldType.array(FieldType.STRING))).build();
Row input = Row.withSchema(schema).addValue(rowKeyValue).addValue(listValue).build();
final List<Row> rows = Collections.singletonList(input);
List<Mutation> mutations = new ArrayList<>();
mutations.add(createMutation(columnFamily, "listColumnName[0]", ByteString.copyFrom(Bytes.toBytes(listValue.get(0)))));
mutations.add(createMutation(columnFamily, "listColumnName[1]", ByteString.copyFrom(Bytes.toBytes(listValue.get(1)))));
mutations.add(createMutation(columnFamily, "listColumnName[2]", ByteString.copyFrom(Bytes.toBytes(listValue.get(2)))));
final List<KV<ByteString, Iterable<Mutation>>> expectedBigtableRows = ImmutableList.of(KV.of(ByteString.copyFrom(Bytes.toBytes("rowkeyvalue")), mutations));
PCollection<KV<ByteString, Iterable<Mutation>>> bigtableRows = pipeline.apply("Create", Create.of(rows)).apply("Transform to Bigtable", ParDo.of(BeamRowToBigtableFn.create(ValueProvider.StaticValueProvider.of("#"), ValueProvider.StaticValueProvider.of("default"))));
PAssert.that(bigtableRows).containsInAnyOrder(expectedBigtableRows);
pipeline.run();
}
use of com.google.spanner.v1.Mutation in project DataflowTemplates by GoogleCloudPlatform.
the class BeamRowToBigtableFnTest method processElementWithSplitLargeRows.
@Test
public void processElementWithSplitLargeRows() {
String columnFamily = "default";
String rowKeyValue = "rowkeyvalue";
String rowKeyColumnName = "rowkey";
// Int32
int int32Value = Integer.MAX_VALUE;
String int32ColumnName = "int32Column";
String listColumnName = "listColumnName";
List<String> listValue = new ArrayList<>();
listValue.add("first");
listValue.add("second");
listValue.add("third");
String mapColumnName = "mapColumnName";
Map<Integer, String> mapValue = new HashMap<>();
mapValue.put(0, "first");
mapValue.put(1, "second");
mapValue.put(2, "third");
Schema schema = Schema.builder().addField(Schema.Field.of(rowKeyColumnName, FieldType.STRING.withMetadata(CassandraRowMapperFn.KEY_ORDER_METADATA_KEY, "0"))).addInt32Field(int32ColumnName).addField(Schema.Field.of(mapColumnName, FieldType.map(FieldType.INT32, FieldType.STRING))).addField(Schema.Field.of(listColumnName, FieldType.array(FieldType.STRING))).build();
Row input = Row.withSchema(schema).addValue(rowKeyValue).addValue(int32Value).addValue(mapValue).addValues(listValue).build();
final List<Row> rows = Collections.singletonList(input);
// Setup the pipeline
PCollection<KV<ByteString, Iterable<Mutation>>> bigtableRows = pipeline.apply("Create", Create.of(rows)).apply("Transform to Bigtable", ParDo.of(BeamRowToBigtableFn.createWithSplitLargeRows(ValueProvider.StaticValueProvider.of("#"), ValueProvider.StaticValueProvider.of("default"), StaticValueProvider.of(true), 4)));
// Setup the expected values and match with returned values.
List<Mutation> mutations1 = new ArrayList<>();
mutations1.add(createMutation(columnFamily, int32ColumnName, ByteString.copyFrom(Bytes.toBytes(int32Value))));
mutations1.add(createMutation(columnFamily, "mapColumnName[0].key", ByteString.copyFrom(Bytes.toBytes(0))));
mutations1.add(createMutation(columnFamily, "mapColumnName[0].value", ByteString.copyFrom(Bytes.toBytes(mapValue.get(0)))));
mutations1.add(createMutation(columnFamily, "mapColumnName[1].key", ByteString.copyFrom(Bytes.toBytes(1))));
List<Mutation> mutations2 = new ArrayList<>();
mutations2.add(createMutation(columnFamily, "mapColumnName[1].value", ByteString.copyFrom(Bytes.toBytes(mapValue.get(1)))));
mutations2.add(createMutation(columnFamily, "mapColumnName[2].key", ByteString.copyFrom(Bytes.toBytes(2))));
mutations2.add(createMutation(columnFamily, "mapColumnName[2].value", ByteString.copyFrom(Bytes.toBytes(mapValue.get(2)))));
mutations2.add(createMutation(columnFamily, "listColumnName[0]", ByteString.copyFrom(Bytes.toBytes(listValue.get(0)))));
List<Mutation> mutations3 = new ArrayList<>();
mutations3.add(createMutation(columnFamily, "listColumnName[1]", ByteString.copyFrom(Bytes.toBytes(listValue.get(1)))));
mutations3.add(createMutation(columnFamily, "listColumnName[2]", ByteString.copyFrom(Bytes.toBytes(listValue.get(2)))));
final List<KV<ByteString, Iterable<Mutation>>> expectedBigtableRows = ImmutableList.of(KV.of(ByteString.copyFrom(Bytes.toBytes("rowkeyvalue")), mutations1), KV.of(ByteString.copyFrom(Bytes.toBytes("rowkeyvalue")), mutations2), KV.of(ByteString.copyFrom(Bytes.toBytes("rowkeyvalue")), mutations3));
PAssert.that(bigtableRows).containsInAnyOrder(expectedBigtableRows);
// Run the pipeline
pipeline.run();
}
use of com.google.spanner.v1.Mutation in project DataflowTemplates by GoogleCloudPlatform.
the class BeamRowToBigtableFnTest method processElementWithPrimaryKey.
@Test
public void processElementWithPrimaryKey() {
String columnFamily = "default";
String rowKeyValue = "this-is-a-value-in-column-rowkey";
String rowKeyColumnName = "rowkey";
String stringValue = "hello this is a random string";
String stringColumnName = "stringColumn";
Schema schema = Schema.builder().addField(Schema.Field.of(rowKeyColumnName, FieldType.STRING.withMetadata(CassandraRowMapperFn.KEY_ORDER_METADATA_KEY, "0"))).addStringField(stringColumnName).build();
Row input = Row.withSchema(schema).addValue(rowKeyValue).addValue(stringValue).build();
final List<Row> rows = Collections.singletonList(input);
List<Mutation> mutations = new ArrayList<>();
mutations.add(createMutation(columnFamily, stringColumnName, ByteString.copyFrom(Bytes.toBytes(stringValue))));
final List<KV<ByteString, Iterable<Mutation>>> expectedBigtableRows = ImmutableList.of(KV.of(ByteString.copyFrom(Bytes.toBytes(rowKeyValue)), mutations));
PCollection<KV<ByteString, Iterable<Mutation>>> bigtableRows = pipeline.apply("Create", Create.of(rows)).apply("Transform to Bigtable", ParDo.of(BeamRowToBigtableFn.create(ValueProvider.StaticValueProvider.of("#"), ValueProvider.StaticValueProvider.of("default"))));
PAssert.that(bigtableRows).containsInAnyOrder(expectedBigtableRows);
pipeline.run();
}
use of com.google.spanner.v1.Mutation in project DataflowTemplates by GoogleCloudPlatform.
the class BeamRowToBigtableFnTest method processElementWithPrimitives.
@Test
public void processElementWithPrimitives() {
String rowKeyValue = "thisistherowkeyvalue";
String rowKeyColumnName = "rowkey";
String columnFamily = "default";
// Byte
Byte byteValue = 20;
String byteColumnName = "byteColumn";
// ByteArray
byte[] byteArrayValue = new byte[20];
new Random().nextBytes(byteArrayValue);
String byteArrayColumnName = "byteArrayColumn";
// Int16
short int16Value = Short.MAX_VALUE;
String int16ColumnName = "int16Column";
// Int32
int int32Value = Integer.MAX_VALUE;
String int32ColumnName = "int32Column";
// Int64
long int64Value = Long.MAX_VALUE;
String int64ColumnName = "int64Column";
// Decimal
BigDecimal decimalValue = BigDecimal.valueOf(1000000);
String decimalColumnName = "decimalColumn";
// Float
Float floatValue = Float.MAX_VALUE;
String floatColumnName = "floatColumn";
// Double
double doubleValue = Double.MAX_VALUE;
String doubleColumnName = "doubleColumn";
// String
String stringValue = "hello this is a random string";
String stringColumnName = "stringColumn";
// DateTime
DateTime dateTimeValue = new DateTime(0);
String dateTimeValueAsString = "1970-01-01T00:00:00.000Z";
String dateTimeColumnName = "dateTimeColumn";
// Boolean
boolean booleanValue = true;
String booleanColumnName = "booleanColumn";
Schema schema = Schema.builder().addField(Schema.Field.of(rowKeyColumnName, FieldType.STRING.withMetadata(CassandraRowMapperFn.KEY_ORDER_METADATA_KEY, "0"))).addByteField(byteColumnName).addByteArrayField(byteArrayColumnName).addInt16Field(int16ColumnName).addInt32Field(int32ColumnName).addInt64Field(int64ColumnName).addDecimalField(decimalColumnName).addFloatField(floatColumnName).addDoubleField(doubleColumnName).addStringField(stringColumnName).addDateTimeField(dateTimeColumnName).addBooleanField(booleanColumnName).build();
Row input = Row.withSchema(schema).addValue(rowKeyValue).addValue(byteValue).addValue(byteArrayValue).addValue(int16Value).addValue(int32Value).addValue(int64Value).addValue(decimalValue).addValue(floatValue).addValue(doubleValue).addValue(stringValue).addValue(dateTimeValue).addValue(booleanValue).build();
final List<Row> rows = Collections.singletonList(input);
List<Mutation> mutations = new ArrayList<>();
byte[] byteWrapper = new byte[1];
byteWrapper[0] = byteValue;
mutations.add(createMutation(columnFamily, byteColumnName, ByteString.copyFrom(byteWrapper)));
mutations.add(createMutation(columnFamily, byteArrayColumnName, ByteString.copyFrom(byteArrayValue)));
mutations.add(createMutation(columnFamily, int16ColumnName, ByteString.copyFrom(Bytes.toBytes(int16Value))));
mutations.add(createMutation(columnFamily, int32ColumnName, ByteString.copyFrom(Bytes.toBytes(int32Value))));
mutations.add(createMutation(columnFamily, int64ColumnName, ByteString.copyFrom(Bytes.toBytes(int64Value))));
mutations.add(createMutation(columnFamily, decimalColumnName, ByteString.copyFrom(Bytes.toBytes(decimalValue))));
mutations.add(createMutation(columnFamily, floatColumnName, ByteString.copyFrom(Bytes.toBytes(floatValue))));
mutations.add(createMutation(columnFamily, doubleColumnName, ByteString.copyFrom(Bytes.toBytes(doubleValue))));
mutations.add(createMutation(columnFamily, stringColumnName, ByteString.copyFrom(Bytes.toBytes(stringValue))));
mutations.add(createMutation(columnFamily, dateTimeColumnName, ByteString.copyFrom(Bytes.toBytes(dateTimeValueAsString))));
mutations.add(createMutation(columnFamily, booleanColumnName, ByteString.copyFrom(Bytes.toBytes(booleanValue))));
final List<KV<ByteString, Iterable<Mutation>>> expectedBigtableRows = ImmutableList.of(KV.of(ByteString.copyFrom(Bytes.toBytes(rowKeyValue)), mutations));
PCollection<KV<ByteString, Iterable<Mutation>>> bigtableRows = pipeline.apply("Create", Create.of(rows)).apply("Transform to Bigtable", ParDo.of(BeamRowToBigtableFn.create(ValueProvider.StaticValueProvider.of("#"), ValueProvider.StaticValueProvider.of("default"))));
PAssert.that(bigtableRows).containsInAnyOrder(expectedBigtableRows);
pipeline.run();
}
Aggregations