Search in sources :

Example 66 with Record

use of org.apache.avro.generic.GenericData.Record in project parquet-mr by apache.

the class TestCircularReferences method test.

@Test
public void test() throws IOException {
    ReferenceManager manager = new ReferenceManager();
    GenericData model = new GenericData();
    model.addLogicalTypeConversion(manager.getTracker());
    model.addLogicalTypeConversion(manager.getHandler());
    Schema parentSchema = Schema.createRecord("Parent", null, null, false);
    Schema placeholderSchema = Schema.createRecord("Placeholder", null, null, false);
    List<Schema.Field> placeholderFields = new ArrayList<Schema.Field>();
    // at least one field is needed to be a valid schema
    placeholderFields.add(new Schema.Field("id", Schema.create(Schema.Type.LONG), null, null));
    placeholderSchema.setFields(placeholderFields);
    Referenceable idRef = new Referenceable("id");
    Schema parentRefSchema = Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.LONG), idRef.addToSchema(placeholderSchema));
    Reference parentRef = new Reference("parent");
    List<Schema.Field> childFields = new ArrayList<Schema.Field>();
    childFields.add(new Schema.Field("c", Schema.create(Schema.Type.STRING), null, null));
    childFields.add(new Schema.Field("parent", parentRefSchema, null, null));
    Schema childSchema = parentRef.addToSchema(Schema.createRecord("Child", null, null, false, childFields));
    List<Schema.Field> parentFields = new ArrayList<Schema.Field>();
    parentFields.add(new Schema.Field("id", Schema.create(Schema.Type.LONG), null, null));
    parentFields.add(new Schema.Field("p", Schema.create(Schema.Type.STRING), null, null));
    parentFields.add(new Schema.Field("child", childSchema, null, null));
    parentSchema.setFields(parentFields);
    Schema schema = idRef.addToSchema(parentSchema);
    System.out.println("Schema: " + schema.toString(true));
    Record parent = new Record(schema);
    parent.put("id", 1L);
    parent.put("p", "parent data!");
    Record child = new Record(childSchema);
    child.put("c", "child data!");
    child.put("parent", parent);
    parent.put("child", child);
    // serialization round trip
    File data = AvroTestUtil.write(temp, model, schema, parent);
    List<Record> records = AvroTestUtil.read(model, schema, data);
    Record actual = records.get(0);
    // because the record is a recursive structure, equals won't work
    Assert.assertEquals("Should correctly read back the parent id", 1L, actual.get("id"));
    Assert.assertEquals("Should correctly read back the parent data", new Utf8("parent data!"), actual.get("p"));
    Record actualChild = (Record) actual.get("child");
    Assert.assertEquals("Should correctly read back the child data", new Utf8("child data!"), actualChild.get("c"));
    Object childParent = actualChild.get("parent");
    Assert.assertTrue("Should have a parent Record object", childParent instanceof Record);
    Record childParentRecord = (Record) actualChild.get("parent");
    Assert.assertEquals("Should have the right parent id", 1L, childParentRecord.get("id"));
    Assert.assertEquals("Should have the right parent data", new Utf8("parent data!"), childParentRecord.get("p"));
}
Also used : Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) GenericData(org.apache.avro.generic.GenericData) Utf8(org.apache.avro.util.Utf8) Record(org.apache.avro.generic.GenericData.Record) IndexedRecord(org.apache.avro.generic.IndexedRecord) File(java.io.File) Test(org.junit.Test)

Example 67 with Record

use of org.apache.avro.generic.GenericData.Record in project components by Talend.

the class KeyValueUtils method getField.

/**
 * Retrieve a field from on indexedRecord.
 *
 * @param fieldPath the field name. Can be a path for hierarchical element
 * @param record an Indexed record
 * @return the Object matching to the fieldName if it was found, null otherwise
 */
public static Record getField(String fieldPath, IndexedRecord record) {
    // TODO current implementation will only extract one element, but
    // further implementation may
    String[] path = fieldPath.split("\\.");
    Schema schema = record.getSchema();
    for (Integer i = 0; i < path.length - 1; i++) {
        if (schema.getField(path[i]) == null) {
            return null;
        }
        // The column was existing on the input record, we forward it to the
        // output record.
        Object inputValue = record.get(schema.getField(path[i]).pos());
        // or directly a value.
        if (inputValue instanceof Record) {
            // If we are on a record, we need to recursively do the process
            record = (IndexedRecord) inputValue;
            // record, so we need to get the true sub-schema
            if (schema.getField(path[i]).schema().getType().equals(Type.RECORD)) {
                schema = schema.getField(path[i]).schema();
            } else if (schema.getField(path[i]).schema().getType().equals(Type.UNION)) {
                for (Schema childSchema : schema.getField(path[i]).schema().getTypes()) {
                    if (childSchema.getType().equals(Type.RECORD)) {
                        schema = childSchema;
                        break;
                    }
                }
            }
        } else {
            // No need to go further, return an empty element
            return null;
        }
    }
    Field field = schema.getField(path[path.length - 1]);
    Schema fieldRecordSchema = Schema.createRecord("temp", null, null, false, Arrays.asList(new Field[] { new Field(field.name(), AvroUtils.wrapAsNullable(field.schema()), field.doc(), field.defaultVal()) }));
    Record fieldRecord = new Record(fieldRecordSchema);
    fieldRecord.put(0, record.get(field.pos()));
    return fieldRecord;
}
Also used : Field(org.apache.avro.Schema.Field) Schema(org.apache.avro.Schema) Record(org.apache.avro.generic.GenericData.Record) IndexedRecord(org.apache.avro.generic.IndexedRecord)

Example 68 with Record

use of org.apache.avro.generic.GenericData.Record in project components by Talend.

the class KeyValueUtils method getFieldValue.

// TODO externalize the following method
/**
 * Retrieve a value of field from on indexedRecord.
 *
 * @param fieldPath the field name. Can be a path for hierarchical element
 * @param record an Indexed record
 * @return the Object matching to the fieldName if it was found, null otherwise
 */
public static Object getFieldValue(String fieldPath, IndexedRecord record) {
    // TODO current implementation will only extract one element, but
    // further implementation may
    String[] path = fieldPath.split("\\.");
    Schema schema = record.getSchema();
    for (Integer i = 0; i < path.length; i++) {
        if (schema.getField(path[i]) == null) {
            return null;
        }
        // The column was existing on the input record, we forward it to the
        // output record.
        Object inputValue = record.get(schema.getField(path[i]).pos());
        // or directly a value.
        if (inputValue instanceof Record) {
            // If we are on a record, we need to recursively do the process
            record = (IndexedRecord) inputValue;
            // record, so we need to get the true sub-schema
            if (schema.getField(path[i]).schema().getType().equals(Type.RECORD)) {
                schema = schema.getField(path[i]).schema();
            } else if (schema.getField(path[i]).schema().getType().equals(Type.UNION)) {
                for (Schema childSchema : schema.getField(path[i]).schema().getTypes()) {
                    if (childSchema.getType().equals(Type.RECORD)) {
                        schema = childSchema;
                        break;
                    }
                }
            }
        } else {
            // if we are on a object, then this is or the expected value of an error.
            if (i == path.length - 1) {
                return inputValue;
            } else {
                // No need to go further, return an empty element
                return null;
            }
        }
    }
    // field not found, return an empty element
    return null;
}
Also used : Schema(org.apache.avro.Schema) Record(org.apache.avro.generic.GenericData.Record) IndexedRecord(org.apache.avro.generic.IndexedRecord)

Example 69 with Record

use of org.apache.avro.generic.GenericData.Record in project components by Talend.

the class KeyValueUtils method extractIndexedRecord.

/**
 * Generate a new Index Record which is the filtered result of the input record.
 *
 * The user can freely remove column, add empty column or change the place of column in the same hierarchical level.
 *
 * @return the new record
 */
public static IndexedRecord extractIndexedRecord(IndexedRecord inputRecord, Schema outputSchema) {
    GenericRecordBuilder outputRecord = new GenericRecordBuilder(outputSchema);
    Schema inputSchema = getUnwrappedSchema(inputRecord);
    for (Field field : outputSchema.getFields()) {
        if (inputSchema.getField(field.name()) != null) {
            // The column was existing on the input record, we forward it to the output record.
            Object inputValue = inputRecord.get(inputSchema.getField(field.name()).pos());
            // if we are on a object, we save it to the output.
            if (inputValue instanceof Record) {
                // The sub-schema at this level is a union of "empty" and a record,
                // so we need to get the true sub-schema
                Schema inputChildSchema = getUnwrappedSchema(inputSchema.getField(field.name()));
                Schema outputChildSchema = getUnwrappedSchema(outputSchema.getField(field.name()));
                if (inputChildSchema.getType().equals(Type.RECORD) && outputChildSchema.getType().equals(Type.RECORD)) {
                    Object childRecord = extractIndexedRecord((IndexedRecord) inputValue, outputChildSchema);
                    outputRecord.set(field.name(), childRecord);
                }
            } else {
                outputRecord.set(field.name(), inputValue);
            }
        } else {
            // element not found => set to the value and its hierarchy to null
            outputRecord.set(field.name(), KeyValueUtils.generateEmptyRecord(outputSchema, field.name()));
        }
    }
    return outputRecord.build();
}
Also used : Field(org.apache.avro.Schema.Field) Schema(org.apache.avro.Schema) GenericRecordBuilder(org.apache.avro.generic.GenericRecordBuilder) Record(org.apache.avro.generic.GenericData.Record) IndexedRecord(org.apache.avro.generic.IndexedRecord)

Example 70 with Record

use of org.apache.avro.generic.GenericData.Record in project components by Talend.

the class AzureStorageQueueWriter method sendParallelMessages.

private synchronized void sendParallelMessages() {
    messagesBuffer.parallelStream().forEach(new Consumer<QueueMessage>() {

        @Override
        public void accept(QueueMessage queueMessage) {
            try {
                queue.addMessage(queueMessage.getMsg(), queueMessage.getTimeToLiveInSeconds(), queueMessage.getInitialVisibilityDelayInSeconds(), null, null);
                result.successCount++;
                IndexedRecord record = new Record(writeSchema);
                record.put(0, queueMessage.getMsg().getMessageContentAsString());
                successfulWrites.add(record);
            } catch (StorageException e) {
                result.rejectCount++;
                LOGGER.error(e.getLocalizedMessage());
            }
        }
    });
    messagesBuffer.clear();
}
Also used : CloudQueueMessage(com.microsoft.azure.storage.queue.CloudQueueMessage) IndexedRecord(org.apache.avro.generic.IndexedRecord) Record(org.apache.avro.generic.GenericData.Record) IndexedRecord(org.apache.avro.generic.IndexedRecord) StorageException(com.microsoft.azure.storage.StorageException)

Aggregations

Record (org.apache.avro.generic.GenericData.Record)96 Test (org.junit.Test)44 IndexedRecord (org.apache.avro.generic.IndexedRecord)43 Schema (org.apache.avro.Schema)33 ArrayList (java.util.ArrayList)24 GenericRecord (org.apache.avro.generic.GenericRecord)14 Field (org.apache.avro.Schema.Field)11 List (java.util.List)10 GenericData (org.apache.avro.generic.GenericData)10 TestRunner (org.apache.nifi.util.TestRunner)8 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)7 JsonObject (com.google.gson.JsonObject)6 DataFileStream (org.apache.avro.file.DataFileStream)6 DataFileWriter (org.apache.avro.file.DataFileWriter)6 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)6 Utf8 (org.apache.avro.util.Utf8)6 TMarketoOutputProperties (org.talend.components.marketo.tmarketooutput.TMarketoOutputProperties)6 ActivityRecord (com.marketo.mktows.ActivityRecord)5 ArrayOfLeadRecord (com.marketo.mktows.ArrayOfLeadRecord)5 LeadChangeRecord (com.marketo.mktows.LeadChangeRecord)5