use of com.google.cloud.teleport.v2.transforms.FormatDatastreamJsonToJson in project DataflowTemplates by GoogleCloudPlatform.
the class DataStreamIO method expandDataStreamJsonStrings.
public PCollection<FailsafeElement<String, String>> expandDataStreamJsonStrings(PCollection<ReadableFile> datastreamFiles) {
PCollection<FailsafeElement<String, String>> datastreamRecords;
FailsafeElementCoder coder = FailsafeElementCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of());
if (this.fileType.equals(JSON_SUFFIX)) {
datastreamRecords = datastreamFiles.apply("FileReadConcurrency", Reshuffle.<ReadableFile>viaRandomKey().withNumBuckets(fileReadConcurrency)).apply("ReadFiles", TextIO.readFiles()).apply("ReshuffleRecords", Reshuffle.viaRandomKey()).apply("ParseJsonRecords", ParDo.of((FormatDatastreamJsonToJson) FormatDatastreamJsonToJson.create().withStreamName(this.streamName).withRenameColumnValues(this.renameColumns).withHashRowId(this.hashRowId).withLowercaseSourceColumns(this.lowercaseSourceColumns))).setCoder(coder);
} else {
SerializableFunction<GenericRecord, FailsafeElement<String, String>> parseFn = FormatDatastreamRecordToJson.create().withStreamName(this.streamName).withRenameColumnValues(this.renameColumns).withHashRowId(this.hashRowId).withLowercaseSourceColumns(this.lowercaseSourceColumns);
datastreamRecords = datastreamFiles.apply("ReshuffleFiles", Reshuffle.<ReadableFile>viaRandomKey()).apply("ParseAvroRows", ParDo.of(new ReadFileRangesFn<FailsafeElement<String, String>>(new CreateParseSourceFn(parseFn, coder), new ReadFileRangesFn.ReadFileRangesFnExceptionHandler()))).setCoder(coder);
}
return datastreamRecords.apply("Reshuffle", Reshuffle.viaRandomKey());
}
Aggregations