use of com.google.cloud.teleport.v2.io.AvroSinkWithJodaDatesConversion in project DataflowTemplates by GoogleCloudPlatform.
the class GenericRecordsToGcsPartitioned method expand.
@Override
public PCollection<PartitionMetadata> expand(PCollection<GenericRecord> input) {
Schema schema = SchemaUtils.parseAvroSchema(serializedAvroSchema);
Sink<GenericRecord> sink;
switch(outputFileFormat) {
case PARQUET:
sink = ParquetIO.sink(schema);
break;
case AVRO:
sink = new AvroSinkWithJodaDatesConversion<>(schema);
break;
default:
throw new UnsupportedOperationException("Output format is not implemented: " + outputFileFormat);
}
if (partitionColumnName == null || partitioningSchema == null) {
LOG.info("PartitionColumnName or/and PartitioningSchema not provided. " + "Writing to GCS without partition");
return input.apply("Write to Storage with No Partition", FileIO.<GenericRecord>write().withSuffix(outputFileFormat.getFileSuffix()).via(sink).to(gcsPath)).getPerDestinationOutputFilenames().apply("MapFileNames", MapElements.into(TypeDescriptors.strings()).via((SerializableFunction<KV<Void, String>, String>) KV::getValue)).apply(MapElements.via(new SimpleFunction<String, PartitionMetadata>() {
@Override
public PartitionMetadata apply(String path) {
return PartitionMetadata.builder().setValues(ImmutableList.of("1")).setLocation(withoutFileName(path)).build();
}
}));
}
ZoneId zoneId = getZoneId(schema);
return input.apply(FileIO.<List<KV<String, Integer>>, GenericRecord>writeDynamic().by((GenericRecord r) -> partitioningSchema.toPartition(Instant.ofEpochMilli(partitionColumnValueToMillis(r.get(partitionColumnName))).atZone(zoneId))).withDestinationCoder(ListCoder.of(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()))).via(sink).to(gcsPath).withNumShards(// must be 1 as we can only have 1 file per Dataplex partition
1).withNaming(p -> Write.defaultNaming(partitionToPath(p), outputFileFormat.getFileSuffix()))).getPerDestinationOutputFilenames().apply(MapElements.via(new SimpleFunction<KV<List<KV<String, Integer>>, String>, PartitionMetadata>() {
@Override
public PartitionMetadata apply(KV<List<KV<String, Integer>>, String> partitionAndPath) {
if (partitionAndPath.getKey() == null) {
throw new IllegalStateException("Partition is null for path " + partitionAndPath.getValue());
}
if (partitionAndPath.getValue() == null) {
throw new IllegalStateException("Path is null for partition " + partitionAndPath.getKey());
}
return PartitionMetadata.builder().setValues(partitionAndPath.getKey().stream().map(e -> String.valueOf(e.getValue())).collect(toImmutableList())).setLocation(withoutFileName(partitionAndPath.getValue())).build();
}
}));
}
Aggregations