use of org.apache.avro.LogicalType in project tdi-studio-se by Talend.
the class DiIncomingSchemaEnforcer method put.
public void put(int i, Object v) {
if (wrapped == null)
wrapped = new GenericData.Record(getRuntimeSchema());
if (v == null) {
wrapped.put(i, null);
return;
}
// TODO(rskraba): check type validation for correctness with studio objects.
Schema.Field f = incomingRuntimeSchema.getFields().get(i);
Schema fieldSchema = AvroUtils.unwrapIfNullable(f.schema());
Object datum = null;
boolean isLogicalDate = false;
LogicalType logicalType = fieldSchema.getLogicalType();
if (logicalType != null) {
if (logicalType == LogicalTypes.date() || logicalType == LogicalTypes.timestampMillis()) {
isLogicalDate = true;
}
}
// TODO(rskraba): This is pretty rough -- fix with a general type conversion strategy.
String talendType = f.getProp(DiSchemaConstants.TALEND6_COLUMN_TALEND_TYPE);
String javaClass = fieldSchema.getProp(SchemaConstants.JAVA_CLASS_FLAG);
if (isLogicalDate || "id_Date".equals(talendType) || "java.util.Date".equals(javaClass)) {
if (v instanceof Date) {
datum = v;
} else if (v instanceof Long) {
datum = new Date((long) v);
} else if (v instanceof String) {
String pattern = f.getProp(DiSchemaConstants.TALEND6_COLUMN_PATTERN);
String vs = (String) v;
if (pattern == null || pattern.equals("yyyy-MM-dd'T'HH:mm:ss'000Z'")) {
if (!vs.endsWith("000Z")) {
//$NON-NLS-1$ //$NON-NLS-2$
throw new RuntimeException("Unparseable date: \"" + vs + "\"");
}
pattern = "yyyy-MM-dd'T'HH:mm:ss";
vs.substring(0, vs.lastIndexOf("000Z"));
}
SimpleDateFormat df = dateFormatCache.get(pattern);
if (df == null) {
df = new SimpleDateFormat(pattern);
df.setTimeZone(TimeZone.getTimeZone("UTC"));
dateFormatCache.put(pattern, df);
}
try {
datum = df.parse((String) v);
} catch (ParseException e) {
throw new RuntimeException(e);
}
}
}
if ("id_BigDecimal".equals(talendType) || "java.math.BigDecimal".equals(javaClass)) {
if (v instanceof BigDecimal) {
datum = v;
} else if (v instanceof String) {
datum = new BigDecimal((String) v);
}
}
if (datum == null) {
switch(fieldSchema.getType()) {
case ARRAY:
break;
case BOOLEAN:
if (v instanceof Boolean)
datum = v;
else
datum = Boolean.valueOf(String.valueOf(v));
break;
case FIXED:
case BYTES:
if (v instanceof byte[])
datum = v;
else
datum = String.valueOf(v).getBytes();
break;
case DOUBLE:
if (v instanceof Number)
datum = ((Number) v).doubleValue();
else
datum = Double.valueOf(String.valueOf(v));
break;
case ENUM:
break;
case FLOAT:
if (v instanceof Number)
datum = ((Number) v).floatValue();
else
datum = Float.valueOf(String.valueOf(v));
break;
case INT:
if (v instanceof Number)
datum = ((Number) v).intValue();
else
datum = Integer.valueOf(String.valueOf(v));
break;
case LONG:
if (v instanceof Number)
datum = ((Number) v).longValue();
else
datum = Long.valueOf(String.valueOf(v));
break;
case MAP:
break;
case NULL:
datum = null;
case RECORD:
break;
case STRING:
datum = String.valueOf(v);
break;
case UNION:
break;
default:
break;
}
}
wrapped.put(i, datum);
}
use of org.apache.avro.LogicalType in project nifi by apache.
the class AvroTypeUtil method determineDataType.
public static DataType determineDataType(final Schema avroSchema, Map<String, DataType> knownRecordTypes) {
if (knownRecordTypes == null) {
throw new IllegalArgumentException("'knownRecordTypes' cannot be null.");
}
final Type avroType = avroSchema.getType();
final LogicalType logicalType = avroSchema.getLogicalType();
if (logicalType != null) {
final String logicalTypeName = logicalType.getName();
switch(logicalTypeName) {
case LOGICAL_TYPE_DATE:
return RecordFieldType.DATE.getDataType();
case LOGICAL_TYPE_TIME_MILLIS:
case LOGICAL_TYPE_TIME_MICROS:
return RecordFieldType.TIME.getDataType();
case LOGICAL_TYPE_TIMESTAMP_MILLIS:
case LOGICAL_TYPE_TIMESTAMP_MICROS:
return RecordFieldType.TIMESTAMP.getDataType();
case LOGICAL_TYPE_DECIMAL:
// Alternatively we could convert it to String, but numeric type is generally more preferable by users.
return RecordFieldType.DOUBLE.getDataType();
}
}
switch(avroType) {
case ARRAY:
return RecordFieldType.ARRAY.getArrayDataType(determineDataType(avroSchema.getElementType(), knownRecordTypes));
case BYTES:
case FIXED:
return RecordFieldType.ARRAY.getArrayDataType(RecordFieldType.BYTE.getDataType());
case BOOLEAN:
return RecordFieldType.BOOLEAN.getDataType();
case DOUBLE:
return RecordFieldType.DOUBLE.getDataType();
case ENUM:
case STRING:
return RecordFieldType.STRING.getDataType();
case FLOAT:
return RecordFieldType.FLOAT.getDataType();
case INT:
return RecordFieldType.INT.getDataType();
case LONG:
return RecordFieldType.LONG.getDataType();
case RECORD:
{
String schemaFullName = avroSchema.getNamespace() + "." + avroSchema.getName();
if (knownRecordTypes.containsKey(schemaFullName)) {
return knownRecordTypes.get(schemaFullName);
} else {
SimpleRecordSchema recordSchema = new SimpleRecordSchema(avroSchema.toString(), AVRO_SCHEMA_FORMAT, SchemaIdentifier.EMPTY);
DataType recordSchemaType = RecordFieldType.RECORD.getRecordDataType(recordSchema);
knownRecordTypes.put(schemaFullName, recordSchemaType);
final List<Field> avroFields = avroSchema.getFields();
final List<RecordField> recordFields = new ArrayList<>(avroFields.size());
for (final Field field : avroFields) {
final String fieldName = field.name();
final Schema fieldSchema = field.schema();
final DataType fieldType = determineDataType(fieldSchema, knownRecordTypes);
final boolean nullable = isNullable(fieldSchema);
addFieldToList(recordFields, field, fieldName, fieldSchema, fieldType, nullable);
}
recordSchema.setFields(recordFields);
return recordSchemaType;
}
}
case NULL:
return RecordFieldType.STRING.getDataType();
case MAP:
final Schema valueSchema = avroSchema.getValueType();
final DataType valueType = determineDataType(valueSchema, knownRecordTypes);
return RecordFieldType.MAP.getMapDataType(valueType);
case UNION:
{
final List<Schema> nonNullSubSchemas = getNonNullSubSchemas(avroSchema);
if (nonNullSubSchemas.size() == 1) {
return determineDataType(nonNullSubSchemas.get(0), knownRecordTypes);
}
final List<DataType> possibleChildTypes = new ArrayList<>(nonNullSubSchemas.size());
for (final Schema subSchema : nonNullSubSchemas) {
final DataType childDataType = determineDataType(subSchema, knownRecordTypes);
possibleChildTypes.add(childDataType);
}
return RecordFieldType.CHOICE.getChoiceDataType(possibleChildTypes);
}
}
return null;
}
use of org.apache.avro.LogicalType in project nifi by apache.
the class AvroTypeUtil method normalizeValue.
/**
* Convert an Avro object to a normal Java objects for further processing.
* The counter-part method which convert a raw value to an Avro object is {@link #convertToAvroObject(Object, Schema, String)}
*/
private static Object normalizeValue(final Object value, final Schema avroSchema, final String fieldName) {
if (value == null) {
return null;
}
switch(avroSchema.getType()) {
case INT:
{
final LogicalType logicalType = avroSchema.getLogicalType();
if (logicalType == null) {
return value;
}
final String logicalName = logicalType.getName();
if (LOGICAL_TYPE_DATE.equals(logicalName)) {
// date logical name means that the value is number of days since Jan 1, 1970
return new java.sql.Date(TimeUnit.DAYS.toMillis((int) value));
} else if (LOGICAL_TYPE_TIME_MILLIS.equals(logicalName)) {
// time-millis logical name means that the value is number of milliseconds since midnight.
return new java.sql.Time((int) value);
}
break;
}
case LONG:
{
final LogicalType logicalType = avroSchema.getLogicalType();
if (logicalType == null) {
return value;
}
final String logicalName = logicalType.getName();
if (LOGICAL_TYPE_TIME_MICROS.equals(logicalName)) {
return new java.sql.Time(TimeUnit.MICROSECONDS.toMillis((long) value));
} else if (LOGICAL_TYPE_TIMESTAMP_MILLIS.equals(logicalName)) {
return new java.sql.Timestamp((long) value);
} else if (LOGICAL_TYPE_TIMESTAMP_MICROS.equals(logicalName)) {
return new java.sql.Timestamp(TimeUnit.MICROSECONDS.toMillis((long) value));
}
break;
}
case UNION:
if (value instanceof GenericData.Record) {
final GenericData.Record avroRecord = (GenericData.Record) value;
return normalizeValue(value, avroRecord.getSchema(), fieldName);
}
return convertUnionFieldValue(value, avroSchema, schema -> normalizeValue(value, schema, fieldName), fieldName);
case RECORD:
final GenericData.Record record = (GenericData.Record) value;
final Schema recordSchema = record.getSchema();
final List<Field> recordFields = recordSchema.getFields();
final Map<String, Object> values = new HashMap<>(recordFields.size());
for (final Field field : recordFields) {
final Object avroFieldValue = record.get(field.name());
final Object fieldValue = normalizeValue(avroFieldValue, field.schema(), fieldName + "/" + field.name());
values.put(field.name(), fieldValue);
}
final RecordSchema childSchema = AvroTypeUtil.createSchema(recordSchema);
return new MapRecord(childSchema, values);
case BYTES:
final ByteBuffer bb = (ByteBuffer) value;
final LogicalType logicalType = avroSchema.getLogicalType();
if (logicalType != null && LOGICAL_TYPE_DECIMAL.equals(logicalType.getName())) {
return new Conversions.DecimalConversion().fromBytes(bb, avroSchema, logicalType);
}
return AvroTypeUtil.convertByteArray(bb.array());
case FIXED:
final GenericFixed fixed = (GenericFixed) value;
return AvroTypeUtil.convertByteArray(fixed.bytes());
case ENUM:
return value.toString();
case NULL:
return null;
case STRING:
return value.toString();
case ARRAY:
final GenericData.Array<?> array = (GenericData.Array<?>) value;
final Object[] valueArray = new Object[array.size()];
for (int i = 0; i < array.size(); i++) {
final Schema elementSchema = avroSchema.getElementType();
valueArray[i] = normalizeValue(array.get(i), elementSchema, fieldName + "[" + i + "]");
}
return valueArray;
case MAP:
final Map<?, ?> avroMap = (Map<?, ?>) value;
final Map<String, Object> map = new HashMap<>(avroMap.size());
for (final Map.Entry<?, ?> entry : avroMap.entrySet()) {
Object obj = entry.getValue();
if (obj instanceof Utf8 || obj instanceof CharSequence) {
obj = obj.toString();
}
final String key = entry.getKey().toString();
obj = normalizeValue(obj, avroSchema.getValueType(), fieldName + "[" + key + "]");
map.put(key, obj);
}
return map;
}
return value;
}
use of org.apache.avro.LogicalType in project nifi by apache.
the class TestJdbcCommon method testConvertToAvroStreamForBigDecimal.
private void testConvertToAvroStreamForBigDecimal(BigDecimal bigDecimal, int dbPrecision, int defaultPrecision, int expectedPrecision, int expectedScale) throws SQLException, IOException {
final ResultSetMetaData metadata = mock(ResultSetMetaData.class);
when(metadata.getColumnCount()).thenReturn(1);
when(metadata.getColumnType(1)).thenReturn(Types.NUMERIC);
when(metadata.getColumnName(1)).thenReturn("The.Chairman");
when(metadata.getTableName(1)).thenReturn("1the::table");
when(metadata.getPrecision(1)).thenReturn(dbPrecision);
when(metadata.getScale(1)).thenReturn(expectedScale);
final ResultSet rs = mock(ResultSet.class);
when(rs.getMetaData()).thenReturn(metadata);
final AtomicInteger counter = new AtomicInteger(1);
Mockito.doAnswer(new Answer<Boolean>() {
@Override
public Boolean answer(InvocationOnMock invocation) throws Throwable {
return counter.getAndDecrement() > 0;
}
}).when(rs).next();
when(rs.getObject(Mockito.anyInt())).thenReturn(bigDecimal);
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
final JdbcCommon.AvroConversionOptions options = JdbcCommon.AvroConversionOptions.builder().convertNames(true).useLogicalTypes(true).defaultPrecision(defaultPrecision).build();
JdbcCommon.convertToAvroStream(rs, baos, options, null);
final byte[] serializedBytes = baos.toByteArray();
final InputStream instream = new ByteArrayInputStream(serializedBytes);
final GenericData genericData = new GenericData();
genericData.addLogicalTypeConversion(new Conversions.DecimalConversion());
final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(null, null, genericData);
try (final DataFileStream<GenericRecord> dataFileReader = new DataFileStream<>(instream, datumReader)) {
final Schema generatedUnion = dataFileReader.getSchema().getField("The_Chairman").schema();
// null and decimal.
assertEquals(2, generatedUnion.getTypes().size());
final LogicalType logicalType = generatedUnion.getTypes().get(1).getLogicalType();
assertNotNull(logicalType);
assertEquals("decimal", logicalType.getName());
LogicalTypes.Decimal decimalType = (LogicalTypes.Decimal) logicalType;
assertEquals(expectedPrecision, decimalType.getPrecision());
assertEquals(expectedScale, decimalType.getScale());
GenericRecord record = null;
while (dataFileReader.hasNext()) {
record = dataFileReader.next(record);
assertEquals("_1the__table", record.getSchema().getName());
assertEquals(bigDecimal, record.get("The_Chairman"));
}
}
}
use of org.apache.avro.LogicalType in project nifi by apache.
the class TestWriteAvroResult method testLogicalTypes.
private void testLogicalTypes(Schema schema) throws ParseException, IOException {
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
final List<RecordField> fields = new ArrayList<>();
fields.add(new RecordField("timeMillis", RecordFieldType.TIME.getDataType()));
fields.add(new RecordField("timeMicros", RecordFieldType.TIME.getDataType()));
fields.add(new RecordField("timestampMillis", RecordFieldType.TIMESTAMP.getDataType()));
fields.add(new RecordField("timestampMicros", RecordFieldType.TIMESTAMP.getDataType()));
fields.add(new RecordField("date", RecordFieldType.DATE.getDataType()));
// Avro decimal is represented as double in NiFi type system.
fields.add(new RecordField("decimal", RecordFieldType.DOUBLE.getDataType()));
final RecordSchema recordSchema = new SimpleRecordSchema(fields);
final String expectedTime = "2017-04-04 14:20:33.789";
final DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
df.setTimeZone(TimeZone.getTimeZone("gmt"));
final long timeLong = df.parse(expectedTime).getTime();
final Map<String, Object> values = new HashMap<>();
values.put("timeMillis", new Time(timeLong));
values.put("timeMicros", new Time(timeLong));
values.put("timestampMillis", new Timestamp(timeLong));
values.put("timestampMicros", new Timestamp(timeLong));
values.put("date", new Date(timeLong));
// Avro decimal is represented as double in NiFi type system.
final BigDecimal expectedDecimal = new BigDecimal("123.45");
values.put("decimal", expectedDecimal.doubleValue());
final Record record = new MapRecord(recordSchema, values);
try (final RecordSetWriter writer = createWriter(schema, baos)) {
writer.write(RecordSet.of(record.getSchema(), record));
}
final byte[] data = baos.toByteArray();
try (final InputStream in = new ByteArrayInputStream(data)) {
final GenericRecord avroRecord = readRecord(in, schema);
final long secondsSinceMidnight = 33 + (20 * 60) + (14 * 60 * 60);
final long millisSinceMidnight = (secondsSinceMidnight * 1000L) + 789;
assertEquals((int) millisSinceMidnight, avroRecord.get("timeMillis"));
assertEquals(millisSinceMidnight * 1000L, avroRecord.get("timeMicros"));
assertEquals(timeLong, avroRecord.get("timestampMillis"));
assertEquals(timeLong * 1000L, avroRecord.get("timestampMicros"));
assertEquals(17260, avroRecord.get("date"));
// Double value will be converted into logical decimal if Avro schema is defined as logical decimal.
final Schema decimalSchema = schema.getField("decimal").schema();
final LogicalType logicalType = decimalSchema.getLogicalType() != null ? decimalSchema.getLogicalType() : // Union type doesn't return logical type. Find the first logical type defined within the union.
decimalSchema.getTypes().stream().map(s -> s.getLogicalType()).filter(Objects::nonNull).findFirst().get();
final BigDecimal decimal = new Conversions.DecimalConversion().fromBytes((ByteBuffer) avroRecord.get("decimal"), decimalSchema, logicalType);
assertEquals(expectedDecimal, decimal);
}
}
Aggregations