use of com.linkedin.databus2.schemas.VersionedSchema in project databus by linkedin.
the class ORListener method frameAvroRecord.
private void frameAvroRecord(long tableId, BinlogEventV4Header bh, List<Row> rl, final DbusOpcode doc) {
try {
final long timestampInNanos = bh.getTimestamp() * 1000000L;
final long scn = scn(_currFileNum, (int) bh.getPosition());
final boolean isReplicated = false;
final TableMapEvent tme = _tableMapEvents.get(tableId);
String tableName = tme.getDatabaseName().toString().toLowerCase() + "." + tme.getTableName().toString().toLowerCase();
VersionedSchema vs = _schemaRegistryService.fetchLatestVersionedSchemaBySourceName(_tableUriToSrcNameMap.get(tableName));
Schema schema = vs.getSchema();
if (_log.isDebugEnabled())
_log.debug("File Number :" + _currFileNum + ", Position :" + (int) bh.getPosition() + ", SCN =" + scn);
for (Row r : rl) {
List<Column> cl = r.getColumns();
GenericRecord gr = new GenericData.Record(schema);
generateAvroEvent(schema, cl, gr);
List<KeyPair> kps = generateKeyPair(cl, schema);
DbChangeEntry db = new DbChangeEntry(scn, timestampInNanos, gr, doc, isReplicated, schema, kps);
_transaction.getPerSourceTransaction(_tableUriToSrcIdMap.get(tableName)).mergeDbChangeEntrySet(db);
}
} catch (NoSuchSchemaException ne) {
throw new DatabusRuntimeException(ne);
} catch (DatabusException de) {
throw new DatabusRuntimeException(de);
}
}
use of com.linkedin.databus2.schemas.VersionedSchema in project databus by linkedin.
the class TestGoldenGateEventProducer method testGGParserStats.
/**
* test collection of parser stats, especially lag between parsed and added files
* @throws Exception
*/
@Test
public void testGGParserStats() throws Exception {
short[] sourceIds = new short[] { 505, 506 };
String[] sourceNames = new String[] { "source1", "source2" };
// setup trail Files directory
File ggTrailDir = FileUtils.createTempDir("testGGParserStats");
// configure physical source
String uri = "gg://" + ggTrailDir.getAbsolutePath() + ":x3";
PhysicalSourceStaticConfig pssc = buildSimplePssc(sourceIds, sourceNames, uri);
LOG.info("Uri=" + uri);
// create schema
Schema s = Schema.parse(sourceAvroSchema);
VersionedSchema vs = new VersionedSchema(new VersionedSchemaId("source1", (short) 3), s, null);
// mock for schema registry
SchemaRegistryService srs = EasyMock.createMock(SchemaRegistryService.class);
EasyMock.expect(srs.fetchLatestVersionedSchemaBySourceName("source1")).andReturn(vs).anyTimes();
EasyMock.expect(srs.fetchLatestVersionedSchemaBySourceName("source2")).andReturn(vs).anyTimes();
EasyMock.expect(srs.fetchLatestVersionedSchemaBySourceName(null)).andReturn(vs);
// mock for MaxSCNReadWriter
MaxSCNReaderWriter mscn = EasyMock.createMock(MaxSCNReaderWriter.class);
EasyMock.expect(mscn.getMaxScn()).andReturn((long) -2).atLeastOnce();
mscn.saveMaxScn(EasyMock.anyLong());
EasyMock.expectLastCall().anyTimes();
EasyMock.replay(mscn);
EasyMock.replay(srs);
int totalTransWritten = 0;
int totalFilesWritten = 0;
// buffer
DbusEventBufferAppendable mb = createBufMult(pssc);
// start GG producer
GoldenGateEventProducer gg = new GoldenGateEventProducer(pssc, srs, mb, null, mscn);
// create first 2 files
addToTrailFile(new File(ggTrailDir.getAbsolutePath() + "/x301"), 100, 4);
addToTrailFile(new File(ggTrailDir.getAbsolutePath() + "/x302"), 200, 4);
totalTransWritten = 8;
totalFilesWritten = 2;
// get hold of parser stats object
final GGParserStatistics ggParserStats = gg.getParserStats();
// all should be 0
Assert.assertEquals(0, ggParserStats.getNumFilesParsed());
Assert.assertEquals(0, ggParserStats.getNumFilesAdded());
Assert.assertEquals(0, ggParserStats.getFilesLag());
Assert.assertEquals(0, ggParserStats.getTimeLag());
Assert.assertEquals(0, ggParserStats.getBytesLag());
try {
LOG.info("starting event producer");
// -2 here does nothing. actual setting happens thru the mock of
gg.start(-2);
// MaxSCNReadWriter
// let it parse first files
TestUtil.assertWithBackoff(new ConditionCheck() {
@Override
public boolean check() {
return ggParserStats.getNumFilesParsed() == 2 && (8 * _transactionPatternSize == ggParserStats.getNumBytesTotalParsed());
}
}, "First two files parsed", 2000, LOG);
// stats in the interim
Assert.assertEquals(2, ggParserStats.getNumFilesParsed());
Assert.assertEquals(2, ggParserStats.getNumFilesAdded());
Assert.assertEquals(0, ggParserStats.getFilesLag());
Assert.assertEquals(0, ggParserStats.getTimeLag());
Assert.assertEquals(0, ggParserStats.getBytesLag());
Assert.assertEquals(totalTransWritten * _transactionPatternSize, ggParserStats.getNumBytesTotalParsed());
gg.pause();
// the file will get parsed but not processed
addToTrailFile(new File(ggTrailDir.getAbsolutePath() + "/x303"), 300, 4);
totalTransWritten += 4;
totalFilesWritten++;
// to get more then a ms lag time
TestUtil.sleep(2000);
addToTrailFile(new File(ggTrailDir.getAbsolutePath() + "/x304"), 400, 4);
totalTransWritten += 4;
totalFilesWritten++;
// to guarantee we picked up stats update (stats are updated
TestUtil.sleep(6000);
// every 5 seconds)
// now we should be 2 files behind. parser thread gets paused AFTER it start
// processing the file
// so the actuall value will be 1 file behind
// 303(already started being parsed), only 304 is behind
int lagFiles = 1;
// 1 file, 4 transactions each
long lagBytes = 1 * 4 * _transactionPatternSize;
/*
* Assert.assertEquals(totalFilesWritten-1, ggParserStats.getNumFilesParsed());
* Assert.assertEquals(totalFilesWritten, ggParserStats.getNumFilesAdded());
* Assert.assertEquals(lagFiles, ggParserStats.getFilesLag()); // because 303 got
* parsed
*
* // we added 4 files and parsed 3 , so the diff should be 1 file size (4
* trasactions in 1 file) Assert.assertEquals(lagBytes,
* ggParserStats.getBytesLag()); Assert.assertTrue(ggParserStats.getTimeLag()>0);
*/
gg.unpause();
TestUtil.sleep(5000);
// now we should catchup
Assert.assertEquals(4, ggParserStats.getNumFilesParsed());
Assert.assertEquals(4, ggParserStats.getNumFilesAdded());
Assert.assertEquals(0, ggParserStats.getFilesLag());
Assert.assertEquals(0, ggParserStats.getTimeLag());
Assert.assertEquals(0, ggParserStats.getBytesLag());
// append to a file
LOG.info("pausing again");
gg.pause();
addToTrailFile(new File(ggTrailDir.getAbsolutePath() + "/x304"), 410, 4);
totalTransWritten += 4;
TestUtil.sleep(1000);
addToTrailFile(new File(ggTrailDir.getAbsolutePath() + "/x304"), 420, 4);
totalTransWritten += 4;
TestUtil.sleep(2000);
gg.unpause();
TestUtil.sleep(5500);
// should be still up
Assert.assertEquals(4, ggParserStats.getNumFilesParsed());
Assert.assertEquals(4, ggParserStats.getNumFilesAdded());
Assert.assertEquals(0, ggParserStats.getFilesLag());
Assert.assertEquals(0, ggParserStats.getTimeLag());
Assert.assertEquals(0, ggParserStats.getBytesLag());
// assert the stats
int totalFilesSize = totalTransWritten * _transactionPatternSize;
Assert.assertEquals((totalFilesSize / totalFilesWritten), ggParserStats.getAvgFileSize());
Assert.assertEquals(true, ggParserStats.getAvgParseTransactionTimeNs() > 0);
Assert.assertEquals("part1", ggParserStats.getPhysicalSourceName());
Assert.assertEquals(totalFilesSize / totalTransWritten, ggParserStats.getAvgTransactionSize());
Assert.assertEquals(423, ggParserStats.getMaxScn());
// 2
Assert.assertEquals(totalTransWritten * 2, ggParserStats.getNumTotalEvents());
// events
// per
// transaction
Assert.assertEquals(totalTransWritten, ggParserStats.getNumTransactionsTotal());
Assert.assertEquals(totalTransWritten, ggParserStats.getNumTransactionsWithEvents());
Assert.assertEquals(0, ggParserStats.getNumTransactionsWithoutEvents());
Assert.assertEquals(true, ggParserStats.getTimeSinceLastAccessMs() > 0);
Assert.assertEquals(totalTransWritten * _transactionPatternSize, ggParserStats.getNumBytesTotalParsed());
Assert.assertEquals("NumSCNRegressions", 0, ggParserStats.getNumSCNRegressions());
Assert.assertEquals("LastSCNRegressed", -1, ggParserStats.getLastRegressedScn());
} finally {
gg.shutdown();
}
return;
}
use of com.linkedin.databus2.schemas.VersionedSchema in project databus by linkedin.
the class BootstrapAuditMain method main.
/**
* @param args
*/
public static void main(String[] args) throws Exception {
BootstrapSeederMain.init(args);
BootstrapSeederMain.StaticConfig staticConfig = BootstrapSeederMain.getStaticConfig();
int interval = staticConfig.getController().getCommitInterval();
int sourceChunkSize = staticConfig.getController().getNumRowsPerQuery();
List<OracleTriggerMonitoredSourceInfo> sources = BootstrapSeederMain.getSources();
BootstrapDBSeeder seeder = BootstrapSeederMain.getSeeder();
BootstrapSrcDBEventReader seedController = BootstrapSeederMain.getReader();
Map<String, String> pKeyNameMap = seedController.getpKeyNameMap();
Map<String, DbusEventKey.KeyType> pKeyTypeMap = seedController.getpKeyTypeMap();
for (OracleTriggerMonitoredSourceInfo source : sources) {
short srcId = source.getSourceId();
new ConcurrentHashMap<Long, ResultSetEntry>();
OracleTableReader oracleReader = null;
MySQLTableReader mySQLReader = null;
try {
SchemaRegistryService schemaRegistry = FileSystemSchemaRegistryService.build(staticConfig.getSchemaRegistry().getFileSystem());
Map<Short, String> schemaSet = schemaRegistry.fetchAllSchemaVersionsBySourceName(source.getSourceName());
VersionedSchemaSet vSchemaSet = new VersionedSchemaSet();
Iterator<Map.Entry<Short, String>> it = schemaSet.entrySet().iterator();
while (it.hasNext()) {
Map.Entry<Short, String> pairs = it.next();
Schema s = Schema.parse(pairs.getValue());
VersionedSchema vs = new VersionedSchema(s.getFullName(), pairs.getKey(), s, null);
vSchemaSet.add(vs);
}
/* Try and identify the schema key */
VersionedSchema vschema = schemaRegistry.fetchLatestVersionedSchemaBySourceName(source.getSourceName());
Schema schema = Schema.parse(vschema.getSchema().toString());
LOG.info("Schema =" + vschema.getSchema() + "version=" + vschema.getVersion() + " name=" + vschema.getSchemaBaseName());
/* Determine type of field txn */
Field txnFieldType = schema.getField("txn");
if (txnFieldType == null) {
throw new Exception("Unable to find field called 'txn'. Cannot proceeed\n");
}
Type txnType = SchemaHelper.getAnyType(txnFieldType);
/*
* Determine primary key of schema. This is assumed to be invariant
* across versions
*/
String keyOverrideName = SchemaHelper.getMetaField(schema, "pk");
String keyColumnName = "key";
if (null != keyOverrideName) {
keyColumnName = keyOverrideName;
}
Field pkeyField = schema.getField(keyColumnName);
if (null == pkeyField) {
keyColumnName = "id";
pkeyField = schema.getField("id");
}
if (null == pkeyField) {
throw new Exception("Unable to get the primary key for schema. Schema is :" + schema);
}
DbusEventAvroDecoder decoder = new DbusEventAvroDecoder(vSchemaSet);
BootstrapAuditTester auditor = new BootstrapAuditTester(schema, BootstrapSrcDBEventReader.getTableName(source));
List<BootstrapAuditTester> auditors = new ArrayList<BootstrapAuditTester>();
auditors.add(auditor);
oracleReader = new OracleTableReader(BootstrapSeederMain.getDataStore().getConnection(), BootstrapSrcDBEventReader.getTableName(source), pkeyField, SchemaHelper.getMetaField(pkeyField, "dbFieldName"), SchemaHelper.getAnyType(pkeyField), sourceChunkSize, seedController.getPKIndex(source), seedController.getQueryHint(source));
mySQLReader = new MySQLTableReader(seeder.getConnection(), // THis is the primary
seeder.getTableName(srcId), // THis is the primary
pkeyField, // THis is the primary
"id", // bootstrapDB
SchemaHelper.getAnyType(pkeyField), interval);
double samplePct = BootstrapSeederMain.getValidationSamplePct();
TableComparator comparator = new TableComparator(oracleReader, mySQLReader, auditor, decoder, interval, pKeyNameMap.get(source.getEventView()), pKeyTypeMap.get(source.getEventView()), txnType, samplePct);
boolean success = false;
if (BootstrapSeederMain.getValidationType().equals("point")) {
success = comparator.compareRecordsPoint();
} else if (BootstrapSeederMain.getValidationType().equals("pointBs")) {
success = comparator.compareRecordsPointBs();
} else {
success = comparator.compareRecordsNew();
}
if (success)
LOG.info("Audit completed successfully");
else
LOG.error("Audit FAILED !!! ");
} catch (Exception ex) {
LOG.error("Caught an exception ex", ex);
throw ex;
} finally {
if (null != oracleReader)
oracleReader.close();
}
}
DBHelper.close(seeder.getConnection());
}
use of com.linkedin.databus2.schemas.VersionedSchema in project databus by linkedin.
the class StateMachineHelper method tableToSchema.
/**
* Given a table name, extracts the avro schema corresponding to the table. If the table is not found (i.e., the relay is not configured
* to host this table), returns null.
* @param currentTable The table for which you want the avro schema (e.g. PERSON.ADDRESSBOOKTABLE)
* @param tableToSourceName The map which has the table to the namespace of the schema (e.g. PERSON.ADDRESSBOOKTABLE => com.linkedin.com.person.addressBook)
* @param schemaRegistryService The schema registry which holds all the schemas from where the schemas are looked up based on namespace
* @return
* @throws DatabusException
*/
public static Schema tableToSchema(String currentTable, HashMap<String, String> tableToSourceName, SchemaRegistryService schemaRegistryService) {
String sourceName = tableToSourceName.get(currentTable);
Schema schema = null;
try {
VersionedSchema vSchema = schemaRegistryService.fetchLatestVersionedSchemaBySourceName(sourceName);
if (vSchema != null)
schema = vSchema.getSchema();
} catch (RuntimeException re) {
throw re;
} catch (Exception e) {
LOG.error("Unable to fetch the schema for the table: " + currentTable + " with source name: " + sourceName + " because of the following error : " + e, e);
return null;
}
if (schema == null)
return null;
return schema;
}
use of com.linkedin.databus2.schemas.VersionedSchema in project databus by linkedin.
the class LoggingConsumer method doDataEvent.
private ConsumerCallbackResult doDataEvent(DbusEvent e, DbusEventDecoder eventDecoder, ConsumerCallbackResult result, boolean bootstrapOn) {
RuntimeConfig rtConfig = getRuntimeConfig();
if (!rtConfig.isEnabled())
return result;
// check for event validity as long as the option is NOT disabled
if (rtConfig.isValidityCheckEnabled()) {
if (!e.isValid()) {
_log.error("invalid event received:");
_log.error(e.toString());
}
}
//for backwards compatibility
if (bootstrapOn) {
++_bstEventsNum;
if (_bstEventsNum % BOOTSTRAP_EVENT_LOG_FREQUENCY == 1) {
final VersionedSchema payloadSchema = eventDecoder.getPayloadSchema(e);
String schemaName = (null == payloadSchema) ? "unknown source: " + e.getSourceId() : payloadSchema.getSchema().getName();
String keyStr = null;
try {
if (e.isKeyString()) {
keyStr = new String(e.keyBytes(), "UTF-8");
} else if (e.isKeyNumber()) {
keyStr = Long.toString(e.key());
} else if (e.isKeySchema()) {
// TODO Fix to use a decoder (DDSDBUS-2076)
DbusEventPart keyPart = e.getKeyPart();
keyStr = keyPart.toString();
}
} catch (UnsupportedEncodingException e1) {
keyStr = "unsupported key encoding";
} catch (RuntimeException ex) {
keyStr = "key decoding error: " + ex;
}
String msg = String.format(BOOTSTRAP_EVENT_LOG_FORMAT, _bstEventsNum, e.sequence(), keyStr, schemaName);
_log.log(rtConfig.getLogLevel(), msg);
}
}
_currentWindowScn = e.sequence();
if (_fileBasedCallback != null) {
_fileBasedCallback.onEvent(e);
}
if (_staticConfig.isLogTypedValue()) {
logTypedValue(e, eventDecoder, rtConfig, bootstrapOn ? "b:" : "s:");
}
updateEventStats(e);
return result;
}
Aggregations