Search in sources :

Example 1 with CodecFactory

use of org.apache.avro.file.CodecFactory in project jaqy by Teradata.

the class AvroExporterFactory method getHandler.

@Override
public JaqyExporter getHandler(CommandLine cmdLine, JaqyInterpreter interpreter) throws Exception {
    CodecFactory codecFactory = null;
    for (Option option : cmdLine.getOptions()) {
        switch(option.getOpt().charAt(0)) {
            case 'c':
                {
                    String value = option.getValue();
                    codecFactory = CodecFactory.fromString(value);
                    break;
                }
        }
    }
    String[] args = cmdLine.getArgs();
    if (args.length == 0)
        throw new IllegalArgumentException("missing file name.");
    OutputStream os = interpreter.getPath(args[0]).getOutputStream();
    return new AvroExporter(os, codecFactory);
}
Also used : OutputStream(java.io.OutputStream) Option(org.apache.commons.cli.Option) CodecFactory(org.apache.avro.file.CodecFactory)

Example 2 with CodecFactory

use of org.apache.avro.file.CodecFactory in project parquet-mr by apache.

the class ToAvroCommand method run.

@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
    Preconditions.checkArgument(targets != null && targets.size() == 1, "A data file is required.");
    String source = targets.get(0);
    CodecFactory codecFactory = Codecs.avroCodec(compressionCodecName);
    Schema schema;
    if (avroSchemaFile != null) {
        schema = Schemas.fromAvsc(open(avroSchemaFile));
    } else {
        schema = getAvroSchema(source);
    }
    Schema projection = filterSchema(schema, columns);
    Path outPath = qualifiedPath(outputPath);
    FileSystem outFS = outPath.getFileSystem(getConf());
    if (overwrite && outFS.exists(outPath)) {
        console.debug("Deleting output file {} (already exists)", outPath);
        outFS.delete(outPath);
    }
    Iterable<Record> reader = openDataFile(source, projection);
    boolean threw = true;
    long count = 0;
    try {
        DatumWriter<Record> datumWriter = new GenericDatumWriter<>(schema);
        DataFileWriter<Record> w = new DataFileWriter<>(datumWriter);
        w.setCodec(codecFactory);
        try (DataFileWriter<Record> writer = w.create(projection, create(outputPath))) {
            for (Record record : reader) {
                writer.append(record);
                count += 1;
            }
        }
        threw = false;
    } catch (RuntimeException e) {
        throw new RuntimeException("Failed on record " + count, e);
    } finally {
        if (reader instanceof Closeable) {
            Closeables.close((Closeable) reader, threw);
        }
    }
    return 0;
}
Also used : Path(org.apache.hadoop.fs.Path) Schema(org.apache.avro.Schema) Expressions.filterSchema(org.apache.parquet.cli.util.Expressions.filterSchema) DataFileWriter(org.apache.avro.file.DataFileWriter) Closeable(java.io.Closeable) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) FileSystem(org.apache.hadoop.fs.FileSystem) Record(org.apache.avro.generic.GenericData.Record) CodecFactory(org.apache.avro.file.CodecFactory)

Example 3 with CodecFactory

use of org.apache.avro.file.CodecFactory in project haivvreo by jghoman.

the class AvroContainerOutputFormat method getHiveRecordWriter.

@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Path path, Class<? extends Writable> valueClass, boolean isCompressed, Properties properties, Progressable progressable) throws IOException {
    Schema schema;
    try {
        schema = HaivvreoUtils.determineSchemaOrThrowException(jobConf, properties);
    } catch (HaivvreoException e) {
        throw new IOException(e);
    }
    GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
    if (isCompressed) {
        int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName);
        dfw.setCodec(factory);
    }
    dfw.create(schema, path.getFileSystem(jobConf).create(path));
    return new AvroGenericRecordWriter(dfw);
}
Also used : Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) IOException(java.io.IOException) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) CodecFactory(org.apache.avro.file.CodecFactory) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 4 with CodecFactory

use of org.apache.avro.file.CodecFactory in project hive by apache.

the class AvroContainerOutputFormat method getHiveRecordWriter.

@Override
public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Path path, Class<? extends Writable> valueClass, boolean isCompressed, Properties properties, Progressable progressable) throws IOException {
    Schema schema;
    try {
        schema = AvroSerdeUtils.determineSchemaOrThrowException(jobConf, properties);
    } catch (AvroSerdeException e) {
        throw new IOException(e);
    }
    GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
    if (isCompressed) {
        int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC);
        CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName);
        dfw.setCodec(factory);
    }
    // add writer.time.zone property to file metadata
    dfw.setMeta(AvroSerDe.WRITER_TIME_ZONE, TimeZone.getDefault().toZoneId().toString());
    dfw.setMeta(AvroSerDe.WRITER_PROLEPTIC, String.valueOf(HiveConf.getBoolVar(jobConf, HiveConf.ConfVars.HIVE_AVRO_PROLEPTIC_GREGORIAN)));
    dfw.setMeta(AvroSerDe.WRITER_ZONE_CONVERSION_LEGACY, String.valueOf(HiveConf.getBoolVar(jobConf, HiveConf.ConfVars.HIVE_AVRO_TIMESTAMP_WRITE_LEGACY_CONVERSION_ENABLED)));
    dfw.create(schema, path.getFileSystem(jobConf).create(path));
    return new AvroGenericRecordWriter(dfw);
}
Also used : AvroSerdeException(org.apache.hadoop.hive.serde2.avro.AvroSerdeException) Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) IOException(java.io.IOException) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) CodecFactory(org.apache.avro.file.CodecFactory) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 5 with CodecFactory

use of org.apache.avro.file.CodecFactory in project flink by apache.

the class AvroKeyValueSinkWriter method open.

@Override
@SuppressWarnings("deprecation")
public void open(FileSystem fs, Path path) throws IOException {
    super.open(fs, path);
    CodecFactory compressionCodec = getCompressionCodec(properties);
    Schema keySchema = Schema.parse(properties.get(CONF_OUTPUT_KEY_SCHEMA));
    Schema valueSchema = Schema.parse(properties.get(CONF_OUTPUT_VALUE_SCHEMA));
    keyValueWriter = new AvroKeyValueWriter<K, V>(keySchema, valueSchema, compressionCodec, getStream());
}
Also used : Schema(org.apache.avro.Schema) CodecFactory(org.apache.avro.file.CodecFactory)

Aggregations

CodecFactory (org.apache.avro.file.CodecFactory)6 Schema (org.apache.avro.Schema)4 DataFileWriter (org.apache.avro.file.DataFileWriter)3 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)3 IOException (java.io.IOException)2 GenericRecord (org.apache.avro.generic.GenericRecord)2 Closeable (java.io.Closeable)1 OutputStream (java.io.OutputStream)1 Record (org.apache.avro.generic.GenericData.Record)1 Option (org.apache.commons.cli.Option)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Path (org.apache.hadoop.fs.Path)1 AvroSerdeException (org.apache.hadoop.hive.serde2.avro.AvroSerdeException)1 Expressions.filterSchema (org.apache.parquet.cli.util.Expressions.filterSchema)1 Test (org.testng.annotations.Test)1