use of org.apache.avro.generic.GenericRecord in project databus by linkedin.
the class TestGoldenGateEventProducer method testAddEventToBuffer.
@Test
public void testAddEventToBuffer() throws InvalidConfigException, UnsupportedKeyException, DatabusException {
// No rate control
long rate = 0;
PhysicalSourceStaticConfig pssc = buildPssc(rate, 0L);
long scn = 10;
DbusEventBuffer mb = (DbusEventBuffer) createBufMult(pssc);
GoldenGateEventProducer gg = new GoldenGateEventProducer(pssc, null, mb, null, null);
List<TransactionState.PerSourceTransactionalUpdate> dbUpdates = new ArrayList<TransactionState.PerSourceTransactionalUpdate>(10);
int sourceId = 505;
HashSet<DBUpdateImage> db = new HashSet<DBUpdateImage>();
Object key = new String("name");
Schema.Type keyType = Schema.Type.RECORD;
ColumnsState.KeyPair kp = new ColumnsState.KeyPair(key, keyType);
ArrayList<ColumnsState.KeyPair> keyPairs = new ArrayList<ColumnsState.KeyPair>(1);
keyPairs.add(kp);
Schema s = Schema.parse(avroSchema);
GenericRecord gr = new GenericData.Record(s);
gr.put("name", "phani");
DBUpdateImage dbi = new DBUpdateImage(keyPairs, scn, gr, s, DbUpdateState.DBUpdateImage.OpType.INSERT, false);
db.add(dbi);
TransactionState.PerSourceTransactionalUpdate dbUpdate = new TransactionState.PerSourceTransactionalUpdate(sourceId, db);
dbUpdates.add(dbUpdate);
long timestamp = System.nanoTime();
gg.addEventToBuffer(dbUpdates, new TransactionInfo(0, 0, timestamp, scn));
Assert.assertEquals(gg.getRateControl().getNumSleeps(), 0);
DbusEventIterator iter = mb.acquireIterator("test");
int count = 0;
long eventTs = 0;
while (iter.hasNext()) {
DbusEvent e = iter.next();
if (count == 1) {
// first event prev control event
eventTs = e.timestampInNanos();
}
count++;
}
Assert.assertEquals("Event timestamp in Ns", timestamp, eventTs);
Assert.assertEquals("Got events ", 3, count);
return;
}
use of org.apache.avro.generic.GenericRecord in project databus by linkedin.
the class TestParser method checkNonEmptyFields.
public boolean checkNonEmptyFields(List<TransactionState.PerSourceTransactionalUpdate> dbUpdates) {
for (TransactionState.PerSourceTransactionalUpdate dbUpdate : dbUpdates) {
Set<DbUpdateState.DBUpdateImage> DBUpdateImage = dbUpdate.getDbUpdatesSet();
Iterator<DbUpdateState.DBUpdateImage> it = DBUpdateImage.iterator();
while (it.hasNext()) {
DbUpdateState.DBUpdateImage image = it.next();
GenericRecord record = image.getGenericRecord();
Iterator<Schema.Field> fieldIt = record.getSchema().getFields().iterator();
while (fieldIt.hasNext()) {
String fieldName = fieldIt.next().name();
if (record.get(fieldName) == null)
return false;
}
}
}
return true;
}
use of org.apache.avro.generic.GenericRecord in project pinot by linkedin.
the class RealtimeQueriesSentinelTest method getRealtimeSegment.
private IndexSegment getRealtimeSegment() throws IOException {
RealtimeSegmentImpl realtimeSegmentImpl = RealtimeSegmentImplTest.createRealtimeSegmentImpl(PINOT_SCHEMA, 100000, "testTable", "testTable_testTable", AVRO_DATA, new ServerMetrics(new MetricsRegistry()));
realtimeSegmentImpl.setSegmentMetadata(getRealtimeSegmentZKMetadata());
try {
DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(AVRO_DATA))));
GenericRow genericRow = null;
while (avroReader.hasNext()) {
GenericRecord avroRecord = avroReader.next();
genericRow = GenericRow.createOrReuseRow(genericRow);
genericRow = AVRO_RECORD_TRANSFORMER.transform(avroRecord, genericRow);
// System.out.println(genericRow);
realtimeSegmentImpl.index(genericRow);
}
} catch (Exception e) {
e.printStackTrace();
}
// System.out.println("Current raw events indexed: " + realtimeSegmentImpl.getRawDocumentCount() + ", totalDocs = "
// + realtimeSegmentImpl.getSegmentMetadata().getTotalDocs());
realtimeSegmentImpl.setSegmentMetadata(getRealtimeSegmentZKMetadata());
return realtimeSegmentImpl;
}
use of org.apache.avro.generic.GenericRecord in project pinot by linkedin.
the class BaseClusterIntegrationTest method pushAvroIntoKafka.
public static void pushAvroIntoKafka(List<File> avroFiles, String kafkaBroker, String kafkaTopic, final byte[] header) {
Properties properties = new Properties();
properties.put("metadata.broker.list", kafkaBroker);
properties.put("serializer.class", "kafka.serializer.DefaultEncoder");
properties.put("request.required.acks", "1");
ProducerConfig producerConfig = new ProducerConfig(properties);
Producer<byte[], byte[]> producer = new Producer<byte[], byte[]>(producerConfig);
for (File avroFile : avroFiles) {
try {
ByteArrayOutputStream outputStream = new ByteArrayOutputStream(65536);
DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile);
BinaryEncoder binaryEncoder = new EncoderFactory().directBinaryEncoder(outputStream, null);
GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(reader.getSchema());
int recordCount = 0;
List<KeyedMessage<byte[], byte[]>> messagesToWrite = new ArrayList<KeyedMessage<byte[], byte[]>>(10000);
int messagesInThisBatch = 0;
for (GenericRecord genericRecord : reader) {
outputStream.reset();
if (header != null && 0 < header.length) {
outputStream.write(header);
}
datumWriter.write(genericRecord, binaryEncoder);
binaryEncoder.flush();
byte[] bytes = outputStream.toByteArray();
KeyedMessage<byte[], byte[]> data = new KeyedMessage<byte[], byte[]>(kafkaTopic, Longs.toByteArray(System.currentTimeMillis()), bytes);
if (BATCH_KAFKA_MESSAGES) {
messagesToWrite.add(data);
messagesInThisBatch++;
if (MAX_MESSAGES_PER_BATCH <= messagesInThisBatch) {
LOGGER.debug("Sending a batch of {} records to Kafka", messagesInThisBatch);
messagesInThisBatch = 0;
producer.send(messagesToWrite);
messagesToWrite.clear();
}
} else {
producer.send(data);
}
recordCount += 1;
}
if (BATCH_KAFKA_MESSAGES) {
LOGGER.info("Sending last match of {} records to Kafka", messagesToWrite.size());
producer.send(messagesToWrite);
}
outputStream.close();
reader.close();
LOGGER.info("Finished writing " + recordCount + " records from " + avroFile.getName() + " into Kafka topic " + kafkaTopic + " from file " + avroFile.getName());
int totalRecordCount = totalAvroRecordWrittenCount.addAndGet(recordCount);
LOGGER.info("Total records written so far " + totalRecordCount);
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
}
use of org.apache.avro.generic.GenericRecord in project pinot by linkedin.
the class BaseClusterIntegrationTest method createH2SchemaAndInsertAvroFiles.
public static void createH2SchemaAndInsertAvroFiles(List<File> avroFiles, Connection connection) {
try {
connection.prepareCall("DROP TABLE IF EXISTS mytable");
File schemaAvroFile = avroFiles.get(0);
DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(schemaAvroFile, datumReader);
Schema schema = dataFileReader.getSchema();
List<Schema.Field> fields = schema.getFields();
List<String> columnNamesAndTypes = new ArrayList<String>(fields.size());
int columnCount = 0;
for (Schema.Field field : fields) {
String fieldName = field.name();
Schema.Type fieldType = field.schema().getType();
switch(fieldType) {
case UNION:
List<Schema> types = field.schema().getTypes();
String columnNameAndType;
String typeName = types.get(0).getName();
if (typeName.equalsIgnoreCase("int")) {
typeName = "bigint";
}
if (types.size() == 1) {
columnNameAndType = fieldName + " " + typeName + " not null";
} else {
columnNameAndType = fieldName + " " + typeName;
}
columnNamesAndTypes.add(columnNameAndType.replace("string", "varchar(128)"));
++columnCount;
break;
case ARRAY:
String elementTypeName = field.schema().getElementType().getName();
if (elementTypeName.equalsIgnoreCase("int")) {
elementTypeName = "bigint";
}
elementTypeName = elementTypeName.replace("string", "varchar(128)");
for (int i = 0; i < MAX_ELEMENTS_IN_MULTI_VALUE; i++) {
columnNamesAndTypes.add(fieldName + "__MV" + i + " " + elementTypeName);
}
++columnCount;
break;
case BOOLEAN:
case INT:
case LONG:
case FLOAT:
case DOUBLE:
case STRING:
String fieldTypeName = fieldType.getName();
if (fieldTypeName.equalsIgnoreCase("int")) {
fieldTypeName = "bigint";
}
columnNameAndType = fieldName + " " + fieldTypeName + " not null";
columnNamesAndTypes.add(columnNameAndType.replace("string", "varchar(128)"));
++columnCount;
break;
case RECORD:
// Ignore records
continue;
default:
// Ignore other avro types
LOGGER.warn("Ignoring field {} of type {}", fieldName, field.schema());
}
}
connection.prepareCall("create table mytable(" + StringUtil.join(",", columnNamesAndTypes.toArray(new String[columnNamesAndTypes.size()])) + ")").execute();
long start = System.currentTimeMillis();
StringBuilder params = new StringBuilder("?");
for (int i = 0; i < columnNamesAndTypes.size() - 1; i++) {
params.append(",?");
}
PreparedStatement statement = connection.prepareStatement("INSERT INTO mytable VALUES (" + params.toString() + ")");
dataFileReader.close();
for (File avroFile : avroFiles) {
datumReader = new GenericDatumReader<GenericRecord>();
dataFileReader = new DataFileReader<GenericRecord>(avroFile, datumReader);
GenericRecord record = null;
while (dataFileReader.hasNext()) {
record = dataFileReader.next(record);
int jdbcIndex = 1;
for (int avroIndex = 0; avroIndex < columnCount; ++avroIndex) {
Object value = record.get(avroIndex);
if (value instanceof GenericData.Array) {
GenericData.Array array = (GenericData.Array) value;
for (int i = 0; i < MAX_ELEMENTS_IN_MULTI_VALUE; i++) {
if (i < array.size()) {
value = array.get(i);
if (value instanceof Utf8) {
value = value.toString();
}
} else {
value = null;
}
statement.setObject(jdbcIndex, value);
++jdbcIndex;
}
} else {
if (value instanceof Utf8) {
value = value.toString();
}
statement.setObject(jdbcIndex, value);
++jdbcIndex;
}
}
statement.execute();
}
dataFileReader.close();
}
LOGGER.info("Insertion took " + (System.currentTimeMillis() - start));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
Aggregations