use of com.google.cloud.teleport.v2.transforms.GenericRecordsToGcsPartitioned in project DataflowTemplates by GoogleCloudPlatform.
the class GenericRecordsToGcsPartitionedTest method testHourlyPartitioning.
@Test
public void testHourlyPartitioning() {
Record record11 = new Record(SCHEMA);
record11.put("x", true);
record11.put("date", dateToEpochMillis(2010, 1, 1, 1));
Record record12 = new Record(SCHEMA);
record12.put("x", false);
record12.put("date", dateToEpochMillis(2010, 1, 1, 1));
Record record21 = new Record(SCHEMA);
record21.put("x", true);
record21.put("date", dateToEpochMillis(2010, 1, 1, 2));
Record record31 = new Record(SCHEMA);
record31.put("x", true);
record31.put("date", dateToEpochMillis(2010, 1, 2, 1));
String tmpRootPath = temporaryFolder.getRoot().getAbsolutePath();
PCollection<PartitionMetadata> result = mainPipeline.apply(Create.<GenericRecord>of(record11, record12, record21, record31).withCoder(AvroCoder.of(SCHEMA))).apply("GenericRecordsToGCS", new GenericRecordsToGcsPartitioned(tmpRootPath, SERIALIZED_SCHEMA, "date", PartitioningSchema.HOURLY, FileFormatOptions.AVRO));
PAssert.that(result).containsInAnyOrder(PartitionMetadata.builder().setLocation(tmpRootPath + "/year=2010/month=1/day=1/hour=1").setValues(ImmutableList.of("2010", "1", "1", "1")).build(), PartitionMetadata.builder().setLocation(tmpRootPath + "/year=2010/month=1/day=1/hour=2").setValues(ImmutableList.of("2010", "1", "1", "2")).build(), PartitionMetadata.builder().setLocation(tmpRootPath + "/year=2010/month=1/day=2/hour=1").setValues(ImmutableList.of("2010", "1", "2", "1")).build());
mainPipeline.run();
verifyRecordsExists(tmpRootPath + "/year=2010/month=1/day=1/hour=1/*", record11, record12);
verifyRecordsExists(tmpRootPath + "/year=2010/month=1/day=1/hour=2/*", record21);
verifyRecordsExists(tmpRootPath + "/year=2010/month=1/day=2/hour=1/*", record31);
}
use of com.google.cloud.teleport.v2.transforms.GenericRecordsToGcsPartitioned in project DataflowTemplates by GoogleCloudPlatform.
the class DataplexJdbcIngestion method buildGcsPipeline.
@VisibleForTesting
static void buildGcsPipeline(Pipeline pipeline, DataplexJdbcIngestionOptions options, DynamicDataSourceConfiguration dataSourceConfig, String targetRootPath) {
// Auto inferring beam schema
Schema beamSchema = Schemas.jdbcSchemaToBeamSchema(dataSourceConfig.buildDatasource(), options.getQuery());
// Convert to Avro Schema
org.apache.avro.Schema avroSchema = AvroUtils.toAvroSchema(beamSchema);
// Read from JdbcIO and convert ResultSet to Beam Row
PCollection<Row> resultRows = pipeline.apply("Read from JdbcIO", DynamicJdbcIO.<Row>read().withDataSourceConfiguration(dataSourceConfig).withQuery(options.getQuery()).withCoder(RowCoder.of(beamSchema)).withRowMapper(BeamSchemaUtil.of(beamSchema)));
// Convert Beam Row to GenericRecord
PCollection<GenericRecord> genericRecords = resultRows.apply("convert to GenericRecord", ParDo.of(new BeamRowToGenericRecordFn(avroSchema))).setCoder(AvroCoder.of(avroSchema));
// Write to GCS bucket
PCollection<PartitionMetadata> metadata = genericRecords.apply("Write to GCS", new GenericRecordsToGcsPartitioned(targetRootPath, Schemas.serialize(avroSchema), options.getParitionColumn(), options.getPartitioningScheme(), options.getFileFormat()));
}
use of com.google.cloud.teleport.v2.transforms.GenericRecordsToGcsPartitioned in project DataflowTemplates by GoogleCloudPlatform.
the class GenericRecordsToGcsPartitionedTest method testDailyPartitioning.
@Test
public void testDailyPartitioning() {
Record record11 = new Record(SCHEMA);
record11.put("x", true);
record11.put("date", dateToEpochMillis(2010, 1, 1));
Record record12 = new Record(SCHEMA);
record12.put("x", false);
record12.put("date", dateToEpochMillis(2010, 1, 1));
Record record21 = new Record(SCHEMA);
record21.put("x", true);
record21.put("date", dateToEpochMillis(2010, 1, 2));
Record record31 = new Record(SCHEMA);
record31.put("x", true);
record31.put("date", dateToEpochMillis(2010, 2, 1));
String tmpRootPath = temporaryFolder.getRoot().getAbsolutePath();
PCollection<PartitionMetadata> result = mainPipeline.apply(Create.<GenericRecord>of(record11, record12, record21, record31).withCoder(AvroCoder.of(SCHEMA))).apply("GenericRecordsToGCS", new GenericRecordsToGcsPartitioned(tmpRootPath, SERIALIZED_SCHEMA, "date", PartitioningSchema.DAILY, FileFormatOptions.AVRO));
PAssert.that(result).containsInAnyOrder(PartitionMetadata.builder().setLocation(tmpRootPath + "/year=2010/month=1/day=1").setValues(ImmutableList.of("2010", "1", "1")).build(), PartitionMetadata.builder().setLocation(tmpRootPath + "/year=2010/month=1/day=2").setValues(ImmutableList.of("2010", "1", "2")).build(), PartitionMetadata.builder().setLocation(tmpRootPath + "/year=2010/month=2/day=1").setValues(ImmutableList.of("2010", "2", "1")).build());
mainPipeline.run();
verifyRecordsExists(tmpRootPath + "/year=2010/month=1/day=1/*", record11, record12);
verifyRecordsExists(tmpRootPath + "/year=2010/month=1/day=2/*", record21);
verifyRecordsExists(tmpRootPath + "/year=2010/month=2/day=1/*", record31);
}
use of com.google.cloud.teleport.v2.transforms.GenericRecordsToGcsPartitioned in project DataflowTemplates by GoogleCloudPlatform.
the class GenericRecordsToGcsPartitionedTest method testMonthlyPartitioning.
@Test
public void testMonthlyPartitioning() {
Record record11 = new Record(SCHEMA);
record11.put("x", true);
record11.put("date", dateToEpochMillis(2010, 1));
Record record12 = new Record(SCHEMA);
record12.put("x", false);
record12.put("date", dateToEpochMillis(2010, 1));
Record record21 = new Record(SCHEMA);
record21.put("x", true);
record21.put("date", dateToEpochMillis(2010, 2));
String tmpRootPath = temporaryFolder.getRoot().getAbsolutePath();
PCollection<PartitionMetadata> result = mainPipeline.apply(Create.<GenericRecord>of(record11, record12, record21).withCoder(AvroCoder.of(SCHEMA))).apply("GenericRecordsToGCS", new GenericRecordsToGcsPartitioned(tmpRootPath, SERIALIZED_SCHEMA, "date", PartitioningSchema.MONTHLY, FileFormatOptions.AVRO));
PAssert.that(result).containsInAnyOrder(PartitionMetadata.builder().setLocation(tmpRootPath + "/year=2010/month=1").setValues(ImmutableList.of("2010", "1")).build(), PartitionMetadata.builder().setLocation(tmpRootPath + "/year=2010/month=2").setValues(ImmutableList.of("2010", "2")).build());
mainPipeline.run();
verifyRecordsExists(tmpRootPath + "/year=2010/month=1/*", record11, record12);
verifyRecordsExists(tmpRootPath + "/year=2010/month=2/*", record21);
}
Aggregations