Search in sources :

Example 6 with GenericData

use of org.apache.avro.generic.GenericData in project flink by apache.

the class AvroFactory method fromGeneric.

private static <T> AvroFactory<T> fromGeneric(ClassLoader cl, Schema schema) {
    checkNotNull(schema, "Unable to create an AvroSerializer with a GenericRecord type without a schema");
    GenericData genericData = new GenericData(cl);
    return new AvroFactory<>(genericData, schema, new GenericDatumReader<>(schema, schema, genericData), new GenericDatumWriter<>(schema, genericData));
}
Also used : GenericData(org.apache.avro.generic.GenericData)

Example 7 with GenericData

use of org.apache.avro.generic.GenericData in project nifi by apache.

the class StandardContentViewerController method doGet.

/**
 * @param request servlet request
 * @param response servlet response
 * @throws ServletException if a servlet-specific error occurs
 * @throws IOException if an I/O error occurs
 */
@Override
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    final ViewableContent content = (ViewableContent) request.getAttribute(ViewableContent.CONTENT_REQUEST_ATTRIBUTE);
    // handle json/xml specifically, treat others as plain text
    String contentType = content.getContentType();
    if (supportedMimeTypes.contains(contentType)) {
        final String formatted;
        // leave the content alone if specified
        if (DisplayMode.Original.equals(content.getDisplayMode())) {
            formatted = content.getContent();
        } else {
            if ("application/json".equals(contentType)) {
                // format json
                final ObjectMapper mapper = new ObjectMapper();
                final Object objectJson = mapper.readValue(content.getContentStream(), Object.class);
                formatted = mapper.writerWithDefaultPrettyPrinter().writeValueAsString(objectJson);
            } else if ("application/xml".equals(contentType) || "text/xml".equals(contentType)) {
                // format xml
                final StringWriter writer = new StringWriter();
                try {
                    final StreamSource source = new StreamSource(content.getContentStream());
                    final StreamResult result = new StreamResult(writer);
                    final TransformerFactory transformFactory = TransformerFactory.newInstance();
                    final Transformer transformer = transformFactory.newTransformer();
                    transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
                    transformer.setOutputProperty(OutputKeys.INDENT, "yes");
                    transformer.transform(source, result);
                } catch (final TransformerFactoryConfigurationError | TransformerException te) {
                    throw new IOException("Unable to transform content as XML: " + te, te);
                }
                // get the transformed xml
                formatted = writer.toString();
            } else if ("application/avro-binary".equals(contentType) || "avro/binary".equals(contentType) || "application/avro+binary".equals(contentType)) {
                final StringBuilder sb = new StringBuilder();
                sb.append("[");
                // Use Avro conversions to display logical type values in human readable way.
                final GenericData genericData = new GenericData() {

                    @Override
                    protected void toString(Object datum, StringBuilder buffer) {
                        // Since these types are not quoted and produce a malformed JSON string, quote it here.
                        if (datum instanceof LocalDate || datum instanceof LocalTime || datum instanceof DateTime) {
                            buffer.append("\"").append(datum).append("\"");
                            return;
                        }
                        super.toString(datum, buffer);
                    }
                };
                genericData.addLogicalTypeConversion(new Conversions.DecimalConversion());
                genericData.addLogicalTypeConversion(new TimeConversions.DateConversion());
                genericData.addLogicalTypeConversion(new TimeConversions.TimeConversion());
                genericData.addLogicalTypeConversion(new TimeConversions.TimestampConversion());
                final DatumReader<GenericData.Record> datumReader = new GenericDatumReader<>(null, null, genericData);
                try (final DataFileStream<GenericData.Record> dataFileReader = new DataFileStream<>(content.getContentStream(), datumReader)) {
                    while (dataFileReader.hasNext()) {
                        final GenericData.Record record = dataFileReader.next();
                        final String formattedRecord = genericData.toString(record);
                        sb.append(formattedRecord);
                        sb.append(",");
                        // Do not format more than 10 MB of content.
                        if (sb.length() > 1024 * 1024 * 2) {
                            break;
                        }
                    }
                }
                if (sb.length() > 1) {
                    sb.deleteCharAt(sb.length() - 1);
                }
                sb.append("]");
                final String json = sb.toString();
                final ObjectMapper mapper = new ObjectMapper();
                final Object objectJson = mapper.readValue(json, Object.class);
                formatted = mapper.writerWithDefaultPrettyPrinter().writeValueAsString(objectJson);
                contentType = "application/json";
            } else {
                // leave plain text alone when formatting
                formatted = content.getContent();
            }
        }
        // defer to the jsp
        request.setAttribute("mode", contentType);
        request.setAttribute("content", formatted);
        request.getRequestDispatcher("/WEB-INF/jsp/codemirror.jsp").include(request, response);
    } else {
        final PrintWriter out = response.getWriter();
        out.println("Unexpected content type: " + contentType);
    }
}
Also used : Transformer(javax.xml.transform.Transformer) GenericDatumReader(org.apache.avro.generic.GenericDatumReader) LocalDate(org.joda.time.LocalDate) DateTime(org.joda.time.DateTime) StringWriter(java.io.StringWriter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) PrintWriter(java.io.PrintWriter) TransformerFactory(javax.xml.transform.TransformerFactory) StreamResult(javax.xml.transform.stream.StreamResult) LocalTime(org.joda.time.LocalTime) StreamSource(javax.xml.transform.stream.StreamSource) IOException(java.io.IOException) DataFileStream(org.apache.avro.file.DataFileStream) GenericData(org.apache.avro.generic.GenericData) TimeConversions(org.apache.avro.data.TimeConversions) TimeConversions(org.apache.avro.data.TimeConversions) Conversions(org.apache.avro.Conversions)

Example 8 with GenericData

use of org.apache.avro.generic.GenericData in project nifi by apache.

the class TestJdbcCommon method testConvertToAvroStreamForBigDecimal.

private void testConvertToAvroStreamForBigDecimal(BigDecimal bigDecimal, int dbPrecision, int defaultPrecision, int expectedPrecision, int expectedScale) throws SQLException, IOException {
    final ResultSetMetaData metadata = mock(ResultSetMetaData.class);
    when(metadata.getColumnCount()).thenReturn(1);
    when(metadata.getColumnType(1)).thenReturn(Types.NUMERIC);
    when(metadata.getColumnName(1)).thenReturn("The.Chairman");
    when(metadata.getTableName(1)).thenReturn("1the::table");
    when(metadata.getPrecision(1)).thenReturn(dbPrecision);
    when(metadata.getScale(1)).thenReturn(expectedScale);
    final ResultSet rs = mock(ResultSet.class);
    when(rs.getMetaData()).thenReturn(metadata);
    final AtomicInteger counter = new AtomicInteger(1);
    Mockito.doAnswer(new Answer<Boolean>() {

        @Override
        public Boolean answer(InvocationOnMock invocation) throws Throwable {
            return counter.getAndDecrement() > 0;
        }
    }).when(rs).next();
    when(rs.getObject(Mockito.anyInt())).thenReturn(bigDecimal);
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    final JdbcCommon.AvroConversionOptions options = JdbcCommon.AvroConversionOptions.builder().convertNames(true).useLogicalTypes(true).defaultPrecision(defaultPrecision).build();
    JdbcCommon.convertToAvroStream(rs, baos, options, null);
    final byte[] serializedBytes = baos.toByteArray();
    final InputStream instream = new ByteArrayInputStream(serializedBytes);
    final GenericData genericData = new GenericData();
    genericData.addLogicalTypeConversion(new Conversions.DecimalConversion());
    final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(null, null, genericData);
    try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) {
        final Schema generatedUnion = dataFileReader.getSchema().getField("The_Chairman").schema();
        // null and decimal.
        assertEquals(2, generatedUnion.getTypes().size());
        final LogicalType logicalType = generatedUnion.getTypes().get(1).getLogicalType();
        assertNotNull(logicalType);
        assertEquals("decimal", logicalType.getName());
        LogicalTypes.Decimal decimalType = (LogicalTypes.Decimal) logicalType;
        assertEquals(expectedPrecision, decimalType.getPrecision());
        assertEquals(expectedScale, decimalType.getScale());
        GenericRecord record = null;
        while (dataFileReader.hasNext()) {
            record = dataFileReader.next(record);
            assertEquals("_1the__table", record.getSchema().getName());
            assertEquals(bigDecimal, record.get("The_Chairman"));
        }
    }
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) LogicalType(org.apache.avro.LogicalType) ResultSetMetaData(java.sql.ResultSetMetaData) BigDecimal(java.math.BigDecimal) ResultSet(java.sql.ResultSet) GenericRecord(org.apache.avro.generic.GenericRecord) ReaderInputStream(org.apache.commons.io.input.ReaderInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) InputStream(java.io.InputStream) LogicalTypes(org.apache.avro.LogicalTypes) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataFileStream(org.apache.avro.file.DataFileStream) GenericData(org.apache.avro.generic.GenericData) Conversions(org.apache.avro.Conversions) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ByteArrayInputStream(java.io.ByteArrayInputStream) InvocationOnMock(org.mockito.invocation.InvocationOnMock)

Example 9 with GenericData

use of org.apache.avro.generic.GenericData in project parquet-mr by apache.

the class TestReadWrite method testFixedDecimalValues.

@Test
public void testFixedDecimalValues() throws Exception {
    Schema decimalSchema = Schema.createRecord("myrecord", null, null, false);
    Schema decimal = LogicalTypes.decimal(9, 2).addToSchema(Schema.createFixed("dec", null, null, 4));
    decimalSchema.setFields(Collections.singletonList(new Schema.Field("dec", decimal, null, null)));
    // add the decimal conversion to a generic data model
    GenericData decimalSupport = new GenericData();
    decimalSupport.addLogicalTypeConversion(new Conversions.DecimalConversion());
    File file = temp.newFile("decimal.parquet");
    file.delete();
    Path path = new Path(file.toString());
    ParquetWriter<GenericRecord> writer = AvroParquetWriter.<GenericRecord>builder(path).withDataModel(decimalSupport).withSchema(decimalSchema).build();
    Random random = new Random(34L);
    GenericRecordBuilder builder = new GenericRecordBuilder(decimalSchema);
    List<GenericRecord> expected = Lists.newArrayList();
    for (int i = 0; i < 1000; i += 1) {
        BigDecimal dec = new BigDecimal(new BigInteger(31, random), 2);
        builder.set("dec", dec);
        GenericRecord rec = builder.build();
        expected.add(rec);
        writer.write(builder.build());
    }
    writer.close();
    ParquetReader<GenericRecord> reader = AvroParquetReader.<GenericRecord>builder(path).withDataModel(decimalSupport).disableCompatibility().build();
    List<GenericRecord> records = Lists.newArrayList();
    GenericRecord rec;
    while ((rec = reader.read()) != null) {
        records.add(rec);
    }
    reader.close();
    Assert.assertTrue("dec field should be a BigDecimal instance", records.get(0).get("dec") instanceof BigDecimal);
    Assert.assertEquals("Content should match", expected, records);
}
Also used : Path(org.apache.hadoop.fs.Path) Schema(org.apache.avro.Schema) GenericData(org.apache.avro.generic.GenericData) BigDecimal(java.math.BigDecimal) Conversions(org.apache.avro.Conversions) Random(java.util.Random) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) BigInteger(java.math.BigInteger) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) Test(org.junit.Test)

Example 10 with GenericData

use of org.apache.avro.generic.GenericData in project parquet-mr by apache.

the class TestCircularReferences method test.

@Test
public void test() throws IOException {
    ReferenceManager manager = new ReferenceManager();
    GenericData model = new GenericData();
    model.addLogicalTypeConversion(manager.getTracker());
    model.addLogicalTypeConversion(manager.getHandler());
    Schema parentSchema = Schema.createRecord("Parent", null, null, false);
    Schema placeholderSchema = Schema.createRecord("Placeholder", null, null, false);
    List<Schema.Field> placeholderFields = new ArrayList<Schema.Field>();
    // at least one field is needed to be a valid schema
    placeholderFields.add(new Schema.Field("id", Schema.create(Schema.Type.LONG), null, null));
    placeholderSchema.setFields(placeholderFields);
    Referenceable idRef = new Referenceable("id");
    Schema parentRefSchema = Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.LONG), idRef.addToSchema(placeholderSchema));
    Reference parentRef = new Reference("parent");
    List<Schema.Field> childFields = new ArrayList<Schema.Field>();
    childFields.add(new Schema.Field("c", Schema.create(Schema.Type.STRING), null, null));
    childFields.add(new Schema.Field("parent", parentRefSchema, null, null));
    Schema childSchema = parentRef.addToSchema(Schema.createRecord("Child", null, null, false, childFields));
    List<Schema.Field> parentFields = new ArrayList<Schema.Field>();
    parentFields.add(new Schema.Field("id", Schema.create(Schema.Type.LONG), null, null));
    parentFields.add(new Schema.Field("p", Schema.create(Schema.Type.STRING), null, null));
    parentFields.add(new Schema.Field("child", childSchema, null, null));
    parentSchema.setFields(parentFields);
    Schema schema = idRef.addToSchema(parentSchema);
    System.out.println("Schema: " + schema.toString(true));
    Record parent = new Record(schema);
    parent.put("id", 1L);
    parent.put("p", "parent data!");
    Record child = new Record(childSchema);
    child.put("c", "child data!");
    child.put("parent", parent);
    parent.put("child", child);
    // serialization round trip
    File data = AvroTestUtil.write(temp, model, schema, parent);
    List<Record> records = AvroTestUtil.read(model, schema, data);
    Record actual = records.get(0);
    // because the record is a recursive structure, equals won't work
    Assert.assertEquals("Should correctly read back the parent id", 1L, actual.get("id"));
    Assert.assertEquals("Should correctly read back the parent data", new Utf8("parent data!"), actual.get("p"));
    Record actualChild = (Record) actual.get("child");
    Assert.assertEquals("Should correctly read back the child data", new Utf8("child data!"), actualChild.get("c"));
    Object childParent = actualChild.get("parent");
    Assert.assertTrue("Should have a parent Record object", childParent instanceof Record);
    Record childParentRecord = (Record) actualChild.get("parent");
    Assert.assertEquals("Should have the right parent id", 1L, childParentRecord.get("id"));
    Assert.assertEquals("Should have the right parent data", new Utf8("parent data!"), childParentRecord.get("p"));
}
Also used : Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) GenericData(org.apache.avro.generic.GenericData) Utf8(org.apache.avro.util.Utf8) Record(org.apache.avro.generic.GenericData.Record) IndexedRecord(org.apache.avro.generic.IndexedRecord) File(java.io.File) Test(org.junit.Test)

Aggregations

GenericData (org.apache.avro.generic.GenericData)11 Schema (org.apache.avro.Schema)7 GenericRecord (org.apache.avro.generic.GenericRecord)5 Conversions (org.apache.avro.Conversions)4 File (java.io.File)3 BigDecimal (java.math.BigDecimal)3 DataFileStream (org.apache.avro.file.DataFileStream)3 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)3 Test (org.junit.Test)3 ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 InputStream (java.io.InputStream)2 BigInteger (java.math.BigInteger)2 Random (java.util.Random)2 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)2 Path (org.apache.hadoop.fs.Path)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 BufferedInputStream (java.io.BufferedInputStream)1 BufferedOutputStream (java.io.BufferedOutputStream)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1 IOException (java.io.IOException)1