Search in sources :

Example 41 with Record

use of org.apache.avro.generic.GenericData.Record in project beam by apache.

the class BigQueryIOStorageReadTest method createRecord.

private static GenericRecord createRecord(String name, Schema schema) {
    GenericRecord genericRecord = new Record(schema);
    genericRecord.put("name", name);
    return genericRecord;
}
Also used : Record(org.apache.avro.generic.GenericData.Record) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 42 with Record

use of org.apache.avro.generic.GenericData.Record in project voldemort by voldemort.

the class AvroBackwardsCompatibilityTest method writeVersion0with1Present.

private static byte[] writeVersion0with1Present(Map<Integer, String> versions, Schema s0) {
    GenericData.Record record = new GenericData.Record(s0);
    record.put("original", new Utf8("Abhinay"));
    AvroVersionedGenericSerializer serializer = new AvroVersionedGenericSerializer(versions);
    return serializer.toBytes(record);
}
Also used : Record(org.apache.avro.generic.GenericData.Record) Utf8(org.apache.avro.util.Utf8) Record(org.apache.avro.generic.GenericData.Record) GenericData(org.apache.avro.generic.GenericData)

Example 43 with Record

use of org.apache.avro.generic.GenericData.Record in project parquet-mr by apache.

the class ToAvroCommand method run.

@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
    Preconditions.checkArgument(targets != null && targets.size() == 1, "A data file is required.");
    String source = targets.get(0);
    CodecFactory codecFactory = Codecs.avroCodec(compressionCodecName);
    final Schema schema;
    if (avroSchemaFile != null) {
        schema = Schemas.fromAvsc(open(avroSchemaFile));
    } else {
        schema = getAvroSchema(source);
    }
    final Schema projection = filterSchema(schema, columns);
    Iterable<Record> reader = openDataFile(source, projection);
    boolean threw = true;
    long count = 0;
    DatumWriter<Record> datumWriter = new GenericDatumWriter<>(schema);
    try (DataFileWriter<Record> fileWriter = new DataFileWriter<>(datumWriter)) {
        fileWriter.setCodec(codecFactory);
        try (OutputStream os = overwrite ? create(outputPath) : createWithNoOverwrite(outputPath);
            DataFileWriter<Record> writer = fileWriter.create(projection, os)) {
            for (Record record : reader) {
                writer.append(record);
                count += 1;
            }
        }
        threw = false;
    } catch (RuntimeException e) {
        throw new RuntimeException("Failed on record " + count, e);
    } finally {
        if (reader instanceof Closeable) {
            Closeables.close((Closeable) reader, threw);
        }
    }
    return 0;
}
Also used : Schema(org.apache.avro.Schema) Expressions.filterSchema(org.apache.parquet.cli.util.Expressions.filterSchema) DataFileWriter(org.apache.avro.file.DataFileWriter) OutputStream(java.io.OutputStream) Closeable(java.io.Closeable) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) Record(org.apache.avro.generic.GenericData.Record) CodecFactory(org.apache.avro.file.CodecFactory)

Example 44 with Record

use of org.apache.avro.generic.GenericData.Record in project parquet-mr by apache.

the class ConvertCSVCommand method run.

@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
    Preconditions.checkArgument(targets != null && targets.size() == 1, "CSV path is required.");
    if (header != null) {
        // if a header is given on the command line, don't assume one is in the file
        noHeader = true;
    }
    CSVProperties props = new CSVProperties.Builder().delimiter(delimiter).escape(escape).quote(quote).header(header).hasHeader(!noHeader).linesToSkip(linesToSkip).charset(charsetName).build();
    String source = targets.get(0);
    Schema csvSchema;
    if (avroSchemaFile != null) {
        csvSchema = Schemas.fromAvsc(open(avroSchemaFile));
    } else {
        Set<String> required = ImmutableSet.of();
        if (requiredFields != null) {
            required = ImmutableSet.copyOf(requiredFields);
        }
        String filename = new File(source).getName();
        String recordName;
        if (filename.contains(".")) {
            recordName = filename.substring(0, filename.indexOf("."));
        } else {
            recordName = filename;
        }
        csvSchema = AvroCSV.inferNullableSchema(recordName, open(source), props, required);
    }
    long count = 0;
    try (AvroCSVReader<Record> reader = new AvroCSVReader<>(open(source), props, csvSchema, Record.class, true)) {
        CompressionCodecName codec = Codecs.parquetCodec(compressionCodecName);
        try (ParquetWriter<Record> writer = AvroParquetWriter.<Record>builder(qualifiedPath(outputPath)).withWriterVersion(v2 ? PARQUET_2_0 : PARQUET_1_0).withWriteMode(overwrite ? ParquetFileWriter.Mode.OVERWRITE : ParquetFileWriter.Mode.CREATE).withCompressionCodec(codec).withDictionaryEncoding(true).withDictionaryPageSize(dictionaryPageSize).withPageSize(pageSize).withRowGroupSize(rowGroupSize).withDataModel(GenericData.get()).withConf(getConf()).withSchema(csvSchema).build()) {
            for (Record record : reader) {
                writer.write(record);
            }
        } catch (RuntimeException e) {
            throw new RuntimeException("Failed on record " + count, e);
        }
    }
    return 0;
}
Also used : Schema(org.apache.avro.Schema) CSVProperties(org.apache.parquet.cli.csv.CSVProperties) AvroCSVReader(org.apache.parquet.cli.csv.AvroCSVReader) CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) Record(org.apache.avro.generic.GenericData.Record) File(java.io.File)

Example 45 with Record

use of org.apache.avro.generic.GenericData.Record in project crunch by cloudera.

the class AvroFileSourceTest method testConfigureJob_GenericData.

@Test
public void testConfigureJob_GenericData() throws IOException {
    AvroType<Record> avroGenericType = Avros.generics(Person.SCHEMA$);
    AvroFileSource<Record> personFileSource = new AvroFileSource<Record>(new Path(tempFile.getAbsolutePath()), avroGenericType);
    personFileSource.configureSource(job, -1);
    assertTrue(job.getConfiguration().getBoolean(AvroJob.INPUT_IS_REFLECT, false));
}
Also used : Path(org.apache.hadoop.fs.Path) Record(org.apache.avro.generic.GenericData.Record) Test(org.junit.Test)

Aggregations

Record (org.apache.avro.generic.GenericData.Record)96 Test (org.junit.Test)44 IndexedRecord (org.apache.avro.generic.IndexedRecord)43 Schema (org.apache.avro.Schema)33 ArrayList (java.util.ArrayList)24 GenericRecord (org.apache.avro.generic.GenericRecord)14 Field (org.apache.avro.Schema.Field)11 List (java.util.List)10 GenericData (org.apache.avro.generic.GenericData)10 TestRunner (org.apache.nifi.util.TestRunner)8 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)7 JsonObject (com.google.gson.JsonObject)6 DataFileStream (org.apache.avro.file.DataFileStream)6 DataFileWriter (org.apache.avro.file.DataFileWriter)6 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)6 Utf8 (org.apache.avro.util.Utf8)6 TMarketoOutputProperties (org.talend.components.marketo.tmarketooutput.TMarketoOutputProperties)6 ActivityRecord (com.marketo.mktows.ActivityRecord)5 ArrayOfLeadRecord (com.marketo.mktows.ArrayOfLeadRecord)5 LeadChangeRecord (com.marketo.mktows.LeadChangeRecord)5