use of org.apache.avro.file.CodecFactory in project jaqy by Teradata.
the class AvroExporterFactory method getHandler.
@Override
public JaqyExporter getHandler(CommandLine cmdLine, JaqyInterpreter interpreter) throws Exception {
CodecFactory codecFactory = null;
for (Option option : cmdLine.getOptions()) {
switch(option.getOpt().charAt(0)) {
case 'c':
{
String value = option.getValue();
codecFactory = CodecFactory.fromString(value);
break;
}
}
}
String[] args = cmdLine.getArgs();
if (args.length == 0)
throw new IllegalArgumentException("missing file name.");
OutputStream os = interpreter.getPath(args[0]).getOutputStream();
return new AvroExporter(os, codecFactory);
}
use of org.apache.avro.file.CodecFactory in project parquet-mr by apache.
the class ToAvroCommand method run.
@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
Preconditions.checkArgument(targets != null && targets.size() == 1, "A data file is required.");
String source = targets.get(0);
CodecFactory codecFactory = Codecs.avroCodec(compressionCodecName);
Schema schema;
if (avroSchemaFile != null) {
schema = Schemas.fromAvsc(open(avroSchemaFile));
} else {
schema = getAvroSchema(source);
}
Schema projection = filterSchema(schema, columns);
Path outPath = qualifiedPath(outputPath);
FileSystem outFS = outPath.getFileSystem(getConf());
if (overwrite && outFS.exists(outPath)) {
console.debug("Deleting output file {} (already exists)", outPath);
outFS.delete(outPath);
}
Iterable<Record> reader = openDataFile(source, projection);
boolean threw = true;
long count = 0;
try {
DatumWriter<Record> datumWriter = new GenericDatumWriter<>(schema);
DataFileWriter<Record> w = new DataFileWriter<>(datumWriter);
w.setCodec(codecFactory);
try (DataFileWriter<Record> writer = w.create(projection, create(outputPath))) {
for (Record record : reader) {
writer.append(record);
count += 1;
}
}
threw = false;
} catch (RuntimeException e) {
throw new RuntimeException("Failed on record " + count, e);
} finally {
if (reader instanceof Closeable) {
Closeables.close((Closeable) reader, threw);
}
}
return 0;
}
use of org.apache.avro.file.CodecFactory in project haivvreo by jghoman.
the class AvroContainerOutputFormat method getHiveRecordWriter.
@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Path path, Class<? extends Writable> valueClass, boolean isCompressed, Properties properties, Progressable progressable) throws IOException {
Schema schema;
try {
schema = HaivvreoUtils.determineSchemaOrThrowException(jobConf, properties);
} catch (HaivvreoException e) {
throw new IOException(e);
}
GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema);
DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
if (isCompressed) {
int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC);
CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName);
dfw.setCodec(factory);
}
dfw.create(schema, path.getFileSystem(jobConf).create(path));
return new AvroGenericRecordWriter(dfw);
}
use of org.apache.avro.file.CodecFactory in project hive by apache.
the class AvroContainerOutputFormat method getHiveRecordWriter.
@Override
public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Path path, Class<? extends Writable> valueClass, boolean isCompressed, Properties properties, Progressable progressable) throws IOException {
Schema schema;
try {
schema = AvroSerdeUtils.determineSchemaOrThrowException(jobConf, properties);
} catch (AvroSerdeException e) {
throw new IOException(e);
}
GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema);
DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw);
if (isCompressed) {
int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC);
CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName);
dfw.setCodec(factory);
}
// add writer.time.zone property to file metadata
dfw.setMeta(AvroSerDe.WRITER_TIME_ZONE, TimeZone.getDefault().toZoneId().toString());
dfw.setMeta(AvroSerDe.WRITER_PROLEPTIC, String.valueOf(HiveConf.getBoolVar(jobConf, HiveConf.ConfVars.HIVE_AVRO_PROLEPTIC_GREGORIAN)));
dfw.setMeta(AvroSerDe.WRITER_ZONE_CONVERSION_LEGACY, String.valueOf(HiveConf.getBoolVar(jobConf, HiveConf.ConfVars.HIVE_AVRO_TIMESTAMP_WRITE_LEGACY_CONVERSION_ENABLED)));
dfw.create(schema, path.getFileSystem(jobConf).create(path));
return new AvroGenericRecordWriter(dfw);
}
use of org.apache.avro.file.CodecFactory in project flink by apache.
the class AvroKeyValueSinkWriter method open.
@Override
@SuppressWarnings("deprecation")
public void open(FileSystem fs, Path path) throws IOException {
super.open(fs, path);
CodecFactory compressionCodec = getCompressionCodec(properties);
Schema keySchema = Schema.parse(properties.get(CONF_OUTPUT_KEY_SCHEMA));
Schema valueSchema = Schema.parse(properties.get(CONF_OUTPUT_VALUE_SCHEMA));
keyValueWriter = new AvroKeyValueWriter<K, V>(keySchema, valueSchema, compressionCodec, getStream());
}
Aggregations