use of org.apache.gobblin.util.schema_check.AvroSchemaCheckStrategy in project incubator-gobblin by apache.
the class FileAwareInputStreamExtractorWithCheckSchema method schemaChecking.
/**
* Use {@link AvroSchemaCheckStrategy} to make sure the real schema and the expected schema have matching field names and types
* @param fsFromFile
* @return
* @throws IOException
*/
protected boolean schemaChecking(FileSystem fsFromFile) throws IOException {
if (!this.state.getPropAsBoolean(CopySource.SCHEMA_CHECK_ENABLED, CopySource.DEFAULT_SCHEMA_CHECK_ENABLED)) {
return true;
}
DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
DataFileReader<GenericRecord> dataFileReader = new DataFileReader(new FsInput(this.file.getFileStatus().getPath(), new Configuration()), datumReader);
Schema schema = dataFileReader.getSchema();
if (this.state.getProp(ConfigurationKeys.COPY_EXPECTED_SCHEMA) == null) {
throw new IOException("Expected schema is not set properly");
}
Schema expectedSchema = new Schema.Parser().parse(this.state.getProp(ConfigurationKeys.COPY_EXPECTED_SCHEMA));
AvroSchemaCheckStrategy strategy = AvroSchemaCheckStrategy.AvroSchemaCheckStrategyFactory.create(this.state);
if (strategy == null) {
throw new IOException("schema check strategy cannot be initialized");
}
return strategy.compare(expectedSchema, schema);
}
Aggregations