Search in sources :

Example 21 with DataFileStream

use of org.apache.avro.file.DataFileStream in project nifi by apache.

the class TestAvroTypeUtil method testMapWithNullSchema.

@Test
public void testMapWithNullSchema() throws IOException {
    Schema recursiveSchema = new Schema.Parser().parse(getClass().getResourceAsStream("schema.json"));
    // Make sure the following doesn't throw an exception
    RecordSchema recordASchema = AvroTypeUtil.createSchema(recursiveSchema.getTypes().get(0));
    // check the fix with the proper file
    try (DataFileStream<GenericRecord> r = new DataFileStream<>(getClass().getResourceAsStream("data.avro"), new GenericDatumReader<>())) {
        GenericRecord n = r.next();
        AvroTypeUtil.convertAvroRecordToMap(n, recordASchema);
    }
}
Also used : RecordSchema(org.apache.nifi.serialization.record.RecordSchema) Schema(org.apache.avro.Schema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) GenericRecord(org.apache.avro.generic.GenericRecord) DataFileStream(org.apache.avro.file.DataFileStream) RecordSchema(org.apache.nifi.serialization.record.RecordSchema) SimpleRecordSchema(org.apache.nifi.serialization.SimpleRecordSchema) Test(org.junit.Test)

Example 22 with DataFileStream

use of org.apache.avro.file.DataFileStream in project Plume by tdunning.

the class WriteAvroFile method writeSomething.

@Test
public void writeSomething() throws IOException {
    Schema s = Schema.create(Schema.Type.DOUBLE);
    DataFileWriter<Double> x = new DataFileWriter<Double>(new SpecificDatumWriter<Double>(s));
    File f = new File("x");
    DataFileWriter<Double> z = x.create(s, f);
    f.deleteOnExit();
    for (int i = 0; i < 10; i++) {
        z.append(3.0 * i);
    }
    z.close();
    DataFileReader<Double> in = new DataFileReader<Double>(new File("x"), new SpecificDatumReader<Double>(s));
    int k = 0;
    while (in.hasNext()) {
        assertEquals(3.0 * k++, in.next(), 0);
    }
    in.close();
    final DataFileStream<Double> data = new DataFileStream<Double>(new BufferedInputStream(new FileInputStream("x")), new SpecificDatumReader<Double>(s));
    k = 0;
    while (data.hasNext()) {
        assertEquals(3.0 * k++, data.next(), 0);
    }
    data.close();
}
Also used : Schema(org.apache.avro.Schema) DataFileWriter(org.apache.avro.file.DataFileWriter) DataFileStream(org.apache.avro.file.DataFileStream) FileInputStream(java.io.FileInputStream) DataFileReader(org.apache.avro.file.DataFileReader) BufferedInputStream(java.io.BufferedInputStream) File(java.io.File) Test(org.junit.Test)

Example 23 with DataFileStream

use of org.apache.avro.file.DataFileStream in project hive by apache.

the class AvroGenericRecordWritable method readFields.

public void readFields(byte[] bytes, Schema writerSchema, Schema readerSchema) throws IOException {
    fileSchema = writerSchema;
    record = new GenericData.Record(writerSchema);
    GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>();
    gdr.setExpected(readerSchema);
    ByteArrayInputStream is = new ByteArrayInputStream(bytes);
    DataFileStream<GenericRecord> dfr = new DataFileStream<GenericRecord>(is, gdr);
    record = dfr.next(record);
    dfr.close();
}
Also used : ByteArrayInputStream(java.io.ByteArrayInputStream) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) GenericRecord(org.apache.avro.generic.GenericRecord) DataFileStream(org.apache.avro.file.DataFileStream) GenericData(org.apache.avro.generic.GenericData)

Example 24 with DataFileStream

use of org.apache.avro.file.DataFileStream in project spf4j by zolyfarkas.

the class AvroMeasurementStore method initWriter.

private <T extends SpecificRecord> AvroFileInfo<T> initWriter(final String fileNameBase, final Path destinationPath, final boolean countEntries, final Class<T> clasz) throws IOException {
    DataFileWriter<T> writer = new DataFileWriter<>(new SpecificDatumWriter<>(clasz));
    if (codecFact != null) {
        writer.setCodec(codecFact);
    }
    long epoch = System.currentTimeMillis();
    writer.setMeta("timeRef", epoch);
    String fileName = fileNameBase + '.' + clasz.getSimpleName().toLowerCase(Locale.US) + ".avro";
    Path file = destinationPath.resolve(fileName);
    long initNrRecords;
    if (Files.isWritable(file)) {
        try (DataFileStream<T> streamReader = new DataFileStream<>(Files.newInputStream(file), new SpecificDatumReader<>(clasz))) {
            if (countEntries) {
                long count = 0L;
                while (streamReader.hasNext()) {
                    count += streamReader.getBlockCount();
                    streamReader.nextBlock();
                }
                initNrRecords = count;
            } else {
                initNrRecords = -1L;
            }
            epoch = streamReader.getMetaLong("timeRef");
        }
        writer = writer.appendTo(file.toFile());
    } else {
        try {
            writer.create(clasz.getConstructor().newInstance().getSchema(), file.toFile());
        } catch (InstantiationException | IllegalAccessException | NoSuchMethodException | InvocationTargetException ex) {
            throw new RuntimeException(ex);
        }
        initNrRecords = 0L;
    }
    return new AvroFileInfo<>(file, writer, epoch, initNrRecords);
}
Also used : Path(java.nio.file.Path) DataFileWriter(org.apache.avro.file.DataFileWriter) DataFileStream(org.apache.avro.file.DataFileStream) InvocationTargetException(java.lang.reflect.InvocationTargetException)

Example 25 with DataFileStream

use of org.apache.avro.file.DataFileStream in project spf4j by zolyfarkas.

the class AvroProfilePersisterTest method testPersister.

@Test
public void testPersister() throws IOException {
    Path file;
    SampleNode sn;
    try (AvroProfilePersister persister = new AvroProfilePersister(org.spf4j.base.Runtime.TMP_FOLDER_PATH, "testProfile", true, 60000L)) {
        SampleNodeTest snt = new SampleNodeTest();
        sn = SampleNode.createSampleNode(snt.newSt1());
        SampleNode.addToSampleNode(sn, snt.newSt2());
        SampleNode.addToSampleNode(sn, snt.newSt3());
        SampleNode.addToSampleNode(sn, snt.newSt4());
        for (int i = 0; i < 10; i++) {
            persister.persist(ImmutableMap.of("test", sn), "tag", Instant.now(), Instant.now());
        }
        file = persister.getTargetFile();
        LOG.debug("persisted profile to {}", file);
    }
    SpecificDatumReader<ApplicationStackSamples> reader = new SpecificDatumReader<>(ApplicationStackSamples.class);
    try (DataFileStream<ApplicationStackSamples> stream = new DataFileStream<>(Files.newInputStream(file), reader)) {
        for (int i = 0; i < 10; i++) {
            ApplicationStackSamples samples = stream.next();
            Assert.assertEquals("test", samples.getContext());
            Assert.assertEquals(sn, Converter.convert(samples.getStackSamples().iterator()));
        }
    }
}
Also used : Path(java.nio.file.Path) ApplicationStackSamples(org.spf4j.base.avro.ApplicationStackSamples) SpecificDatumReader(org.apache.avro.specific.SpecificDatumReader) DataFileStream(org.apache.avro.file.DataFileStream) Test(org.junit.Test)

Aggregations

DataFileStream (org.apache.avro.file.DataFileStream)59 GenericRecord (org.apache.avro.generic.GenericRecord)39 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)34 Test (org.junit.Test)26 Schema (org.apache.avro.Schema)21 ByteArrayInputStream (java.io.ByteArrayInputStream)20 InputStream (java.io.InputStream)19 IOException (java.io.IOException)13 ByteArrayOutputStream (java.io.ByteArrayOutputStream)11 File (java.io.File)9 FileInputStream (java.io.FileInputStream)9 ResultSet (java.sql.ResultSet)9 HashMap (java.util.HashMap)9 MockFlowFile (org.apache.nifi.util.MockFlowFile)9 Statement (java.sql.Statement)8 BufferedInputStream (java.io.BufferedInputStream)7 HashSet (java.util.HashSet)7 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)7 DataFileWriter (org.apache.avro.file.DataFileWriter)7 Path (org.apache.hadoop.fs.Path)7