Search in sources :

Example 16 with SchemaParser

use of com.linkedin.data.schema.SchemaParser in project rest.li by linkedin.

the class FilterSchemaGenerator method main.

public static void main(String[] args) {
    CommandLine cl = null;
    try {
        final CommandLineParser parser = new GnuParser();
        cl = parser.parse(_options, args);
    } catch (ParseException e) {
        _log.error("Invalid arguments: " + e.getMessage());
        reportInvalidArguments();
    }
    final String[] directoryArgs = cl.getArgs();
    if (directoryArgs.length != 2) {
        reportInvalidArguments();
    }
    final File sourceDirectory = new File(directoryArgs[0]);
    if (!sourceDirectory.exists()) {
        _log.error(sourceDirectory.getPath() + " does not exist");
        System.exit(1);
    }
    if (!sourceDirectory.isDirectory()) {
        _log.error(sourceDirectory.getPath() + " is not a directory");
        System.exit(1);
    }
    final URI sourceDirectoryURI = sourceDirectory.toURI();
    final File outputDirectory = new File(directoryArgs[1]);
    if (outputDirectory.exists() && !sourceDirectory.isDirectory()) {
        _log.error(outputDirectory.getPath() + " is not a directory");
        System.exit(1);
    }
    final boolean isAvroMode = cl.hasOption('a');
    final String predicateExpression = cl.getOptionValue('e');
    final Predicate predicate = PredicateExpressionParser.parse(predicateExpression);
    final Collection<File> sourceFiles = FileUtil.listFiles(sourceDirectory, null);
    int exitCode = 0;
    for (File sourceFile : sourceFiles) {
        try {
            final ValidationOptions val = new ValidationOptions();
            val.setAvroUnionMode(isAvroMode);
            final SchemaParser schemaParser = new SchemaParser();
            schemaParser.setValidationOptions(val);
            schemaParser.parse(new FileInputStream(sourceFile));
            if (schemaParser.hasError()) {
                _log.error("Error parsing " + sourceFile.getPath() + ": " + schemaParser.errorMessageBuilder());
                exitCode = 1;
                continue;
            }
            final DataSchema originalSchema = schemaParser.topLevelDataSchemas().get(0);
            if (!(originalSchema instanceof NamedDataSchema)) {
                _log.error(sourceFile.getPath() + " does not contain valid NamedDataSchema");
                exitCode = 1;
                continue;
            }
            final SchemaParser filterParser = new SchemaParser();
            filterParser.setValidationOptions(val);
            final NamedDataSchema filteredSchema = Filters.removeByPredicate((NamedDataSchema) originalSchema, predicate, filterParser);
            if (filterParser.hasError()) {
                _log.error("Error applying predicate: " + filterParser.errorMessageBuilder());
                exitCode = 1;
                continue;
            }
            final String relativePath = sourceDirectoryURI.relativize(sourceFile.toURI()).getPath();
            final String outputFilePath = outputDirectory.getPath() + File.separator + relativePath;
            final File outputFile = new File(outputFilePath);
            final File outputFileParent = outputFile.getParentFile();
            outputFileParent.mkdirs();
            if (!outputFileParent.exists()) {
                _log.error("Unable to write filtered schema to " + outputFileParent.getPath());
                exitCode = 1;
                continue;
            }
            FileOutputStream fout = new FileOutputStream(outputFile);
            String schemaJson = SchemaToJsonEncoder.schemaToJson(filteredSchema, JsonBuilder.Pretty.INDENTED);
            fout.write(schemaJson.getBytes(RestConstants.DEFAULT_CHARSET));
            fout.close();
        } catch (IOException e) {
            _log.error(e.getMessage());
            exitCode = 1;
        }
    }
    System.exit(exitCode);
}
Also used : GnuParser(org.apache.commons.cli.GnuParser) IOException(java.io.IOException) ValidationOptions(com.linkedin.data.schema.validation.ValidationOptions) SchemaParser(com.linkedin.data.schema.SchemaParser) URI(java.net.URI) FileInputStream(java.io.FileInputStream) Predicate(com.linkedin.data.it.Predicate) DataSchema(com.linkedin.data.schema.DataSchema) NamedDataSchema(com.linkedin.data.schema.NamedDataSchema) NamedDataSchema(com.linkedin.data.schema.NamedDataSchema) CommandLine(org.apache.commons.cli.CommandLine) FileOutputStream(java.io.FileOutputStream) CommandLineParser(org.apache.commons.cli.CommandLineParser) ParseException(org.apache.commons.cli.ParseException) File(java.io.File)

Aggregations

SchemaParser (com.linkedin.data.schema.SchemaParser)16 DataSchema (com.linkedin.data.schema.DataSchema)11 PegasusSchemaParser (com.linkedin.data.schema.PegasusSchemaParser)10 NamedDataSchema (com.linkedin.data.schema.NamedDataSchema)9 Test (org.testng.annotations.Test)7 RecordDataSchema (com.linkedin.data.schema.RecordDataSchema)6 DataMap (com.linkedin.data.DataMap)5 Schema (org.apache.avro.Schema)5 Predicate (com.linkedin.data.it.Predicate)4 GenericRecord (org.apache.avro.generic.GenericRecord)4 ValidationOptions (com.linkedin.data.schema.validation.ValidationOptions)3 FileInputStream (java.io.FileInputStream)2 IOException (java.io.IOException)2 BeforeTest (org.testng.annotations.BeforeTest)2 ByteString (com.linkedin.data.ByteString)1 ArrayDataSchema (com.linkedin.data.schema.ArrayDataSchema)1 DataSchemaLocation (com.linkedin.data.schema.DataSchemaLocation)1 DataSchemaResolver (com.linkedin.data.schema.DataSchemaResolver)1 DataSchemaTraverse (com.linkedin.data.schema.DataSchemaTraverse)1 EnumDataSchema (com.linkedin.data.schema.EnumDataSchema)1