use of org.apache.beam.examples.complete.datatokenization.options.DataTokenizationOptions in project beam by apache.
the class DataTokenizationTest method fileSystemIORead.
private PCollection<Row> fileSystemIORead(String inputGcsFilePattern, FORMAT inputGcsFileFormat) throws IOException {
DataTokenizationOptions options = PipelineOptionsFactory.create().as(DataTokenizationOptions.class);
options.setDataSchemaPath(SCHEMA_FILE_PATH);
options.setInputFilePattern(inputGcsFilePattern);
options.setInputFileFormat(inputGcsFileFormat);
if (inputGcsFileFormat == FORMAT.CSV) {
options.setCsvContainsHeaders(Boolean.FALSE);
}
SchemasUtils testSchemaUtils = new SchemasUtils(options.getDataSchemaPath(), StandardCharsets.UTF_8);
CoderRegistry coderRegistry = testPipeline.getCoderRegistry();
coderRegistry.registerCoderForType(FAILSAFE_ELEMENT_CODER.getEncodedTypeDescriptor(), FAILSAFE_ELEMENT_CODER);
coderRegistry.registerCoderForType(RowCoder.of(testSchemaUtils.getBeamSchema()).getEncodedTypeDescriptor(), RowCoder.of(testSchemaUtils.getBeamSchema()));
/*
* Row/Row Coder for FailsafeElement.
*/
FailsafeElementCoder<Row, Row> coder = FailsafeElementCoder.of(RowCoder.of(testSchemaUtils.getBeamSchema()), RowCoder.of(testSchemaUtils.getBeamSchema()));
coderRegistry.registerCoderForType(coder.getEncodedTypeDescriptor(), coder);
return new TokenizationFileSystemIO(options).read(testPipeline, testSchemaUtils);
}
use of org.apache.beam.examples.complete.datatokenization.options.DataTokenizationOptions in project beam by apache.
the class DataTokenization method main.
/**
* Main entry point for pipeline execution.
*
* @param args Command line arguments to the pipeline.
*/
public static void main(String[] args) {
DataTokenizationOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(DataTokenizationOptions.class);
FileSystems.setDefaultPipelineOptions(options);
run(options);
}
Aggregations