use of org.apache.avro.generic.GenericData.Record in project beam by apache.
the class BigQueryIOStorageReadTest method createRecord.
private static GenericRecord createRecord(String name, Schema schema) {
GenericRecord genericRecord = new Record(schema);
genericRecord.put("name", name);
return genericRecord;
}
use of org.apache.avro.generic.GenericData.Record in project voldemort by voldemort.
the class AvroBackwardsCompatibilityTest method writeVersion0with1Present.
private static byte[] writeVersion0with1Present(Map<Integer, String> versions, Schema s0) {
GenericData.Record record = new GenericData.Record(s0);
record.put("original", new Utf8("Abhinay"));
AvroVersionedGenericSerializer serializer = new AvroVersionedGenericSerializer(versions);
return serializer.toBytes(record);
}
use of org.apache.avro.generic.GenericData.Record in project parquet-mr by apache.
the class ToAvroCommand method run.
@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
Preconditions.checkArgument(targets != null && targets.size() == 1, "A data file is required.");
String source = targets.get(0);
CodecFactory codecFactory = Codecs.avroCodec(compressionCodecName);
final Schema schema;
if (avroSchemaFile != null) {
schema = Schemas.fromAvsc(open(avroSchemaFile));
} else {
schema = getAvroSchema(source);
}
final Schema projection = filterSchema(schema, columns);
Iterable<Record> reader = openDataFile(source, projection);
boolean threw = true;
long count = 0;
DatumWriter<Record> datumWriter = new GenericDatumWriter<>(schema);
try (DataFileWriter<Record> fileWriter = new DataFileWriter<>(datumWriter)) {
fileWriter.setCodec(codecFactory);
try (OutputStream os = overwrite ? create(outputPath) : createWithNoOverwrite(outputPath);
DataFileWriter<Record> writer = fileWriter.create(projection, os)) {
for (Record record : reader) {
writer.append(record);
count += 1;
}
}
threw = false;
} catch (RuntimeException e) {
throw new RuntimeException("Failed on record " + count, e);
} finally {
if (reader instanceof Closeable) {
Closeables.close((Closeable) reader, threw);
}
}
return 0;
}
use of org.apache.avro.generic.GenericData.Record in project parquet-mr by apache.
the class ConvertCSVCommand method run.
@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
Preconditions.checkArgument(targets != null && targets.size() == 1, "CSV path is required.");
if (header != null) {
// if a header is given on the command line, don't assume one is in the file
noHeader = true;
}
CSVProperties props = new CSVProperties.Builder().delimiter(delimiter).escape(escape).quote(quote).header(header).hasHeader(!noHeader).linesToSkip(linesToSkip).charset(charsetName).build();
String source = targets.get(0);
Schema csvSchema;
if (avroSchemaFile != null) {
csvSchema = Schemas.fromAvsc(open(avroSchemaFile));
} else {
Set<String> required = ImmutableSet.of();
if (requiredFields != null) {
required = ImmutableSet.copyOf(requiredFields);
}
String filename = new File(source).getName();
String recordName;
if (filename.contains(".")) {
recordName = filename.substring(0, filename.indexOf("."));
} else {
recordName = filename;
}
csvSchema = AvroCSV.inferNullableSchema(recordName, open(source), props, required);
}
long count = 0;
try (AvroCSVReader<Record> reader = new AvroCSVReader<>(open(source), props, csvSchema, Record.class, true)) {
CompressionCodecName codec = Codecs.parquetCodec(compressionCodecName);
try (ParquetWriter<Record> writer = AvroParquetWriter.<Record>builder(qualifiedPath(outputPath)).withWriterVersion(v2 ? PARQUET_2_0 : PARQUET_1_0).withWriteMode(overwrite ? ParquetFileWriter.Mode.OVERWRITE : ParquetFileWriter.Mode.CREATE).withCompressionCodec(codec).withDictionaryEncoding(true).withDictionaryPageSize(dictionaryPageSize).withPageSize(pageSize).withRowGroupSize(rowGroupSize).withDataModel(GenericData.get()).withConf(getConf()).withSchema(csvSchema).build()) {
for (Record record : reader) {
writer.write(record);
}
} catch (RuntimeException e) {
throw new RuntimeException("Failed on record " + count, e);
}
}
return 0;
}
use of org.apache.avro.generic.GenericData.Record in project crunch by cloudera.
the class AvroFileSourceTest method testConfigureJob_GenericData.
@Test
public void testConfigureJob_GenericData() throws IOException {
AvroType<Record> avroGenericType = Avros.generics(Person.SCHEMA$);
AvroFileSource<Record> personFileSource = new AvroFileSource<Record>(new Path(tempFile.getAbsolutePath()), avroGenericType);
personFileSource.configureSource(job, -1);
assertTrue(job.getConfiguration().getBoolean(AvroJob.INPUT_IS_REFLECT, false));
}
Aggregations