Search in sources :

Example 56 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project databus by linkedin.

the class TestGoldenGateEventProducer method testAddEventToBuffer.

@Test
public void testAddEventToBuffer() throws InvalidConfigException, UnsupportedKeyException, DatabusException {
    // No rate control
    long rate = 0;
    PhysicalSourceStaticConfig pssc = buildPssc(rate, 0L);
    long scn = 10;
    DbusEventBuffer mb = (DbusEventBuffer) createBufMult(pssc);
    GoldenGateEventProducer gg = new GoldenGateEventProducer(pssc, null, mb, null, null);
    List<TransactionState.PerSourceTransactionalUpdate> dbUpdates = new ArrayList<TransactionState.PerSourceTransactionalUpdate>(10);
    int sourceId = 505;
    HashSet<DBUpdateImage> db = new HashSet<DBUpdateImage>();
    Object key = new String("name");
    Schema.Type keyType = Schema.Type.RECORD;
    ColumnsState.KeyPair kp = new ColumnsState.KeyPair(key, keyType);
    ArrayList<ColumnsState.KeyPair> keyPairs = new ArrayList<ColumnsState.KeyPair>(1);
    keyPairs.add(kp);
    Schema s = Schema.parse(avroSchema);
    GenericRecord gr = new GenericData.Record(s);
    gr.put("name", "phani");
    DBUpdateImage dbi = new DBUpdateImage(keyPairs, scn, gr, s, DbUpdateState.DBUpdateImage.OpType.INSERT, false);
    db.add(dbi);
    TransactionState.PerSourceTransactionalUpdate dbUpdate = new TransactionState.PerSourceTransactionalUpdate(sourceId, db);
    dbUpdates.add(dbUpdate);
    long timestamp = System.nanoTime();
    gg.addEventToBuffer(dbUpdates, new TransactionInfo(0, 0, timestamp, scn));
    Assert.assertEquals(gg.getRateControl().getNumSleeps(), 0);
    DbusEventIterator iter = mb.acquireIterator("test");
    int count = 0;
    long eventTs = 0;
    while (iter.hasNext()) {
        DbusEvent e = iter.next();
        if (count == 1) {
            // first event prev control event
            eventTs = e.timestampInNanos();
        }
        count++;
    }
    Assert.assertEquals("Event timestamp in Ns", timestamp, eventTs);
    Assert.assertEquals("Got events ", 3, count);
    return;
}
Also used : PhysicalSourceStaticConfig(com.linkedin.databus2.relay.config.PhysicalSourceStaticConfig) TransactionState(com.linkedin.databus2.ggParser.XmlStateMachine.TransactionState) Schema(org.apache.avro.Schema) VersionedSchema(com.linkedin.databus2.schemas.VersionedSchema) ArrayList(java.util.ArrayList) TransactionInfo(com.linkedin.databus.monitoring.mbean.GGParserStatistics.TransactionInfo) GenericRecord(org.apache.avro.generic.GenericRecord) GenericRecord(org.apache.avro.generic.GenericRecord) HashSet(java.util.HashSet) DbusEvent(com.linkedin.databus.core.DbusEvent) DBUpdateImage(com.linkedin.databus2.ggParser.XmlStateMachine.DbUpdateState.DBUpdateImage) ColumnsState(com.linkedin.databus2.ggParser.XmlStateMachine.ColumnsState) DbusEventBuffer(com.linkedin.databus.core.DbusEventBuffer) DbusEventIterator(com.linkedin.databus.core.DbusEventBuffer.DbusEventIterator) Test(org.testng.annotations.Test)

Example 57 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project databus by linkedin.

the class TestParser method checkNonEmptyFields.

public boolean checkNonEmptyFields(List<TransactionState.PerSourceTransactionalUpdate> dbUpdates) {
    for (TransactionState.PerSourceTransactionalUpdate dbUpdate : dbUpdates) {
        Set<DbUpdateState.DBUpdateImage> DBUpdateImage = dbUpdate.getDbUpdatesSet();
        Iterator<DbUpdateState.DBUpdateImage> it = DBUpdateImage.iterator();
        while (it.hasNext()) {
            DbUpdateState.DBUpdateImage image = it.next();
            GenericRecord record = image.getGenericRecord();
            Iterator<Schema.Field> fieldIt = record.getSchema().getFields().iterator();
            while (fieldIt.hasNext()) {
                String fieldName = fieldIt.next().name();
                if (record.get(fieldName) == null)
                    return false;
            }
        }
    }
    return true;
}
Also used : TransactionState(com.linkedin.databus2.ggParser.XmlStateMachine.TransactionState) DbUpdateState(com.linkedin.databus2.ggParser.XmlStateMachine.DbUpdateState) GenericRecord(org.apache.avro.generic.GenericRecord)

Example 58 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project pinot by linkedin.

the class RealtimeQueriesSentinelTest method getRealtimeSegment.

private IndexSegment getRealtimeSegment() throws IOException {
    RealtimeSegmentImpl realtimeSegmentImpl = RealtimeSegmentImplTest.createRealtimeSegmentImpl(PINOT_SCHEMA, 100000, "testTable", "testTable_testTable", AVRO_DATA, new ServerMetrics(new MetricsRegistry()));
    realtimeSegmentImpl.setSegmentMetadata(getRealtimeSegmentZKMetadata());
    try {
        DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA))));
        GenericRow genericRow = null;
        while (avroReader.hasNext()) {
            GenericRecord avroRecord = avroReader.next();
            genericRow = GenericRow.createOrReuseRow(genericRow);
            genericRow = AVRO_RECORD_TRANSFORMER.transform(avroRecord, genericRow);
            // System.out.println(genericRow);
            realtimeSegmentImpl.index(genericRow);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    //    System.out.println("Current raw events indexed: " + realtimeSegmentImpl.getRawDocumentCount() + ", totalDocs = "
    //        + realtimeSegmentImpl.getSegmentMetadata().getTotalDocs());
    realtimeSegmentImpl.setSegmentMetadata(getRealtimeSegmentZKMetadata());
    return realtimeSegmentImpl;
}
Also used : GenericRow(com.linkedin.pinot.core.data.GenericRow) MetricsRegistry(com.yammer.metrics.core.MetricsRegistry) ServerMetrics(com.linkedin.pinot.common.metrics.ServerMetrics) RealtimeSegmentImpl(com.linkedin.pinot.core.realtime.impl.RealtimeSegmentImpl) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException)

Example 59 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project pinot by linkedin.

the class BaseClusterIntegrationTest method pushAvroIntoKafka.

public static void pushAvroIntoKafka(List<File> avroFiles, String kafkaBroker, String kafkaTopic, final byte[] header) {
    Properties properties = new Properties();
    properties.put("metadata.broker.list", kafkaBroker);
    properties.put("serializer.class", "kafka.serializer.DefaultEncoder");
    properties.put("request.required.acks", "1");
    ProducerConfig producerConfig = new ProducerConfig(properties);
    Producer<byte[], byte[]> producer = new Producer<byte[], byte[]>(producerConfig);
    for (File avroFile : avroFiles) {
        try {
            ByteArrayOutputStream outputStream = new ByteArrayOutputStream(65536);
            DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile);
            BinaryEncoder binaryEncoder = new EncoderFactory().directBinaryEncoder(outputStream, null);
            GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(reader.getSchema());
            int recordCount = 0;
            List<KeyedMessage<byte[], byte[]>> messagesToWrite = new ArrayList<KeyedMessage<byte[], byte[]>>(10000);
            int messagesInThisBatch = 0;
            for (GenericRecord genericRecord : reader) {
                outputStream.reset();
                if (header != null && 0 < header.length) {
                    outputStream.write(header);
                }
                datumWriter.write(genericRecord, binaryEncoder);
                binaryEncoder.flush();
                byte[] bytes = outputStream.toByteArray();
                KeyedMessage<byte[], byte[]> data = new KeyedMessage<byte[], byte[]>(kafkaTopic, Longs.toByteArray(System.currentTimeMillis()), bytes);
                if (BATCH_KAFKA_MESSAGES) {
                    messagesToWrite.add(data);
                    messagesInThisBatch++;
                    if (MAX_MESSAGES_PER_BATCH <= messagesInThisBatch) {
                        LOGGER.debug("Sending a batch of {} records to Kafka", messagesInThisBatch);
                        messagesInThisBatch = 0;
                        producer.send(messagesToWrite);
                        messagesToWrite.clear();
                    }
                } else {
                    producer.send(data);
                }
                recordCount += 1;
            }
            if (BATCH_KAFKA_MESSAGES) {
                LOGGER.info("Sending last match of {} records to Kafka", messagesToWrite.size());
                producer.send(messagesToWrite);
            }
            outputStream.close();
            reader.close();
            LOGGER.info("Finished writing " + recordCount + " records from " + avroFile.getName() + " into Kafka topic " + kafkaTopic + " from file " + avroFile.getName());
            int totalRecordCount = totalAvroRecordWrittenCount.addAndGet(recordCount);
            LOGGER.info("Total records written so far " + totalRecordCount);
        } catch (Exception e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }
}
Also used : EncoderFactory(org.apache.avro.io.EncoderFactory) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) GenericDatumWriter(org.apache.avro.generic.GenericDatumWriter) Properties(java.util.Properties) JSONException(org.json.JSONException) ArchiveException(org.apache.commons.compress.archivers.ArchiveException) SQLException(java.sql.SQLException) IOException(java.io.IOException) Producer(kafka.javaapi.producer.Producer) BinaryEncoder(org.apache.avro.io.BinaryEncoder) ProducerConfig(kafka.producer.ProducerConfig) GenericRecord(org.apache.avro.generic.GenericRecord) KeyedMessage(kafka.producer.KeyedMessage) File(java.io.File)

Example 60 with GenericRecord

use of org.apache.avro.generic.GenericRecord in project pinot by linkedin.

the class BaseClusterIntegrationTest method createH2SchemaAndInsertAvroFiles.

public static void createH2SchemaAndInsertAvroFiles(List<File> avroFiles, Connection connection) {
    try {
        connection.prepareCall("DROP TABLE IF EXISTS mytable");
        File schemaAvroFile = avroFiles.get(0);
        DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
        DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(schemaAvroFile, datumReader);
        Schema schema = dataFileReader.getSchema();
        List<Schema.Field> fields = schema.getFields();
        List<String> columnNamesAndTypes = new ArrayList<String>(fields.size());
        int columnCount = 0;
        for (Schema.Field field : fields) {
            String fieldName = field.name();
            Schema.Type fieldType = field.schema().getType();
            switch(fieldType) {
                case UNION:
                    List<Schema> types = field.schema().getTypes();
                    String columnNameAndType;
                    String typeName = types.get(0).getName();
                    if (typeName.equalsIgnoreCase("int")) {
                        typeName = "bigint";
                    }
                    if (types.size() == 1) {
                        columnNameAndType = fieldName + " " + typeName + " not null";
                    } else {
                        columnNameAndType = fieldName + " " + typeName;
                    }
                    columnNamesAndTypes.add(columnNameAndType.replace("string", "varchar(128)"));
                    ++columnCount;
                    break;
                case ARRAY:
                    String elementTypeName = field.schema().getElementType().getName();
                    if (elementTypeName.equalsIgnoreCase("int")) {
                        elementTypeName = "bigint";
                    }
                    elementTypeName = elementTypeName.replace("string", "varchar(128)");
                    for (int i = 0; i < MAX_ELEMENTS_IN_MULTI_VALUE; i++) {
                        columnNamesAndTypes.add(fieldName + "__MV" + i + " " + elementTypeName);
                    }
                    ++columnCount;
                    break;
                case BOOLEAN:
                case INT:
                case LONG:
                case FLOAT:
                case DOUBLE:
                case STRING:
                    String fieldTypeName = fieldType.getName();
                    if (fieldTypeName.equalsIgnoreCase("int")) {
                        fieldTypeName = "bigint";
                    }
                    columnNameAndType = fieldName + " " + fieldTypeName + " not null";
                    columnNamesAndTypes.add(columnNameAndType.replace("string", "varchar(128)"));
                    ++columnCount;
                    break;
                case RECORD:
                    // Ignore records
                    continue;
                default:
                    // Ignore other avro types
                    LOGGER.warn("Ignoring field {} of type {}", fieldName, field.schema());
            }
        }
        connection.prepareCall("create table mytable(" + StringUtil.join(",", columnNamesAndTypes.toArray(new String[columnNamesAndTypes.size()])) + ")").execute();
        long start = System.currentTimeMillis();
        StringBuilder params = new StringBuilder("?");
        for (int i = 0; i < columnNamesAndTypes.size() - 1; i++) {
            params.append(",?");
        }
        PreparedStatement statement = connection.prepareStatement("INSERT INTO mytable VALUES (" + params.toString() + ")");
        dataFileReader.close();
        for (File avroFile : avroFiles) {
            datumReader = new GenericDatumReader<GenericRecord>();
            dataFileReader = new DataFileReader<GenericRecord>(avroFile, datumReader);
            GenericRecord record = null;
            while (dataFileReader.hasNext()) {
                record = dataFileReader.next(record);
                int jdbcIndex = 1;
                for (int avroIndex = 0; avroIndex < columnCount; ++avroIndex) {
                    Object value = record.get(avroIndex);
                    if (value instanceof GenericData.Array) {
                        GenericData.Array array = (GenericData.Array) value;
                        for (int i = 0; i < MAX_ELEMENTS_IN_MULTI_VALUE; i++) {
                            if (i < array.size()) {
                                value = array.get(i);
                                if (value instanceof Utf8) {
                                    value = value.toString();
                                }
                            } else {
                                value = null;
                            }
                            statement.setObject(jdbcIndex, value);
                            ++jdbcIndex;
                        }
                    } else {
                        if (value instanceof Utf8) {
                            value = value.toString();
                        }
                        statement.setObject(jdbcIndex, value);
                        ++jdbcIndex;
                    }
                }
                statement.execute();
            }
            dataFileReader.close();
        }
        LOGGER.info("Insertion took " + (System.currentTimeMillis() - start));
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) Schema(org.apache.avro.Schema) ArrayList(java.util.ArrayList) DataFileReader(org.apache.avro.file.DataFileReader) GenericRecord(org.apache.avro.generic.GenericRecord) PreparedStatement(java.sql.PreparedStatement) GenericData(org.apache.avro.generic.GenericData) JSONException(org.json.JSONException) ArchiveException(org.apache.commons.compress.archivers.ArchiveException) SQLException(java.sql.SQLException) IOException(java.io.IOException) JSONArray(org.json.JSONArray) Utf8(org.apache.avro.util.Utf8) JSONObject(org.json.JSONObject) File(java.io.File)

Aggregations

GenericRecord (org.apache.avro.generic.GenericRecord)262 Schema (org.apache.avro.Schema)101 Test (org.junit.Test)80 GenericDatumWriter (org.apache.avro.generic.GenericDatumWriter)46 File (java.io.File)35 IOException (java.io.IOException)34 GenericData (org.apache.avro.generic.GenericData)30 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)30 ArrayList (java.util.ArrayList)29 ByteArrayOutputStream (java.io.ByteArrayOutputStream)27 DataFileWriter (org.apache.avro.file.DataFileWriter)20 HashMap (java.util.HashMap)19 ByteBuffer (java.nio.ByteBuffer)18 BinaryEncoder (org.apache.avro.io.BinaryEncoder)17 Field (org.apache.avro.Schema.Field)14 DataFileStream (org.apache.avro.file.DataFileStream)14 GenericRecordBuilder (org.apache.avro.generic.GenericRecordBuilder)14 Utf8 (org.apache.avro.util.Utf8)14 Encoder (org.apache.avro.io.Encoder)12 DatasetRepository (com.cloudera.cdk.data.DatasetRepository)11