use of com.linkedin.databus.core.util.RateMonitor in project databus by linkedin.
the class BootstrapAvroFileEventReader method readEventsFromHadoopFiles.
private EventReaderSummary readEventsFromHadoopFiles(OracleTriggerMonitoredSourceInfo sourceInfo, File avroSeedDir, Long windowSCN) {
DataFileReader<GenericRecord> reader = null;
File[] files = avroSeedDir.listFiles();
List<File> fileList = Arrays.asList(files);
Collections.sort(fileList);
long numRead = 0;
long prevNumRead = 0;
long numBytes = 0;
long timestamp = System.currentTimeMillis();
long timeStart = timestamp;
long lastTime = timestamp;
long commitInterval = _config.getCommitInterval();
long totLatency = 0;
GenericRecord record = null;
RateMonitor seedingRate = new RateMonitor("Seeding Rate");
seedingRate.start();
seedingRate.suspend();
long startRowId = _lastRows.get(sourceInfo.getEventView());
LOG.info("Last Known Row Id is :" + startRowId);
boolean resumeSeedingRate = true;
for (File avroSeedFile : files) {
if (!avroSeedFile.isFile())
continue;
LOG.info("Seeding from File : " + avroSeedFile);
try {
reader = new DataFileReader<GenericRecord>(avroSeedFile, new GenericDatumReader<GenericRecord>());
} catch (IOException e) {
LOG.fatal("Failed to bootstrap from file " + avroSeedFile.getAbsolutePath(), e);
throw new RuntimeException("Failed to bootstrap from file " + avroSeedFile.getAbsolutePath(), e);
}
try {
boolean committed = false;
for (GenericRecord hdfsRecord : reader) {
record = hdfsRecord;
committed = false;
numRead++;
if (numRead < startRowId)
continue;
if (resumeSeedingRate) {
seedingRate.resume();
resumeSeedingRate = false;
}
seedingRate.tick();
//LOG.info("Read record :" + record);
long start = System.nanoTime();
long eventSize = sourceInfo.getFactory().createAndAppendEvent(windowSCN, timestamp, hdfsRecord, _bootstrapEventBuffer, false, null);
numBytes += eventSize;
long latency = System.nanoTime() - start;
totLatency += latency;
if (numRead % commitInterval == 0) {
_bootstrapEventBuffer.endEvents(numRead, timestamp, null);
_bootstrapEventBuffer.startEvents();
long procTime = totLatency / 1000000000;
long currTime = System.currentTimeMillis();
long diff = (currTime - lastTime) / 1000;
long timeSinceStart = (currTime - timeStart) / 1000;
LOG.info("Processed " + commitInterval + " rows in " + diff + " seconds, Avro Processing Time (seconds) so far :" + (procTime) + ",Seconds elapsed since start :" + (timeSinceStart) + ",Overall Row Rate:" + seedingRate.getRate() + ", NumRows Fetched so far:" + numRead + ". TotalEventSize :" + numBytes);
lastTime = currTime;
seedingRate.resume();
committed = true;
}
}
if (!committed) {
_bootstrapEventBuffer.endEvents(numRead, timestamp, null);
_bootstrapEventBuffer.startEvents();
long procTime = totLatency / 1000000000;
long currTime = System.currentTimeMillis();
long diff = (currTime - lastTime) / 1000;
long timeSinceStart = (currTime - timeStart) / 1000;
LOG.info("Completed Seeding from : " + avroSeedFile + ", Processed " + commitInterval + " rows in " + diff + " seconds, Avro Processing Time (seconds) so far :" + (procTime) + ",Seconds elapsed since start :" + (timeSinceStart) + ",Overall Row Rate:" + seedingRate.getRate() + ", NumRows Fetched so far:" + numRead + ". TotalEventSize :" + numBytes);
lastTime = currTime;
seedingRate.resume();
}
} catch (Exception e) {
LOG.fatal("NumRead :" + numRead + ", Got Exception while processing generic record :" + record, e);
throw new RuntimeException(e);
}
LOG.info("Processed " + (numRead - prevNumRead) + " rows of Source: " + sourceInfo.getSourceName() + " from file " + avroSeedFile);
prevNumRead = numRead;
}
long timeEnd = System.currentTimeMillis();
long elapsedMin = (timeEnd - timeStart) / (MILLISEC_TO_MIN);
LOG.info("Processed " + numRead + " rows of Source: " + sourceInfo.getSourceName() + " in " + elapsedMin + " minutes");
return new EventReaderSummary(sourceInfo.getSourceId(), sourceInfo.getSourceName(), -1, (int) numRead, numBytes, (timeEnd - timeStart), (timeEnd - timeStart) / numRead, 0, 0, 0);
}
use of com.linkedin.databus.core.util.RateMonitor in project databus by linkedin.
the class BootstrapSrcDBEventReader method readEventsForSource.
private EventReaderSummary readEventsForSource(OracleTriggerMonitoredSourceInfo sourceInfo, long maxScn) throws DatabusException, EventCreationException, UnsupportedKeyException, SQLException, IOException {
int retryMax = _numRetries;
int numRetry = 0;
Connection conn = null;
PreparedStatement pstmt = null;
ResultSet rs = null;
KeyType keyType = _pKeyTypeMap.get(sourceInfo.getEventView());
String keyName = _pKeyNameMap.get(sourceInfo.getEventView());
String sql = _eventQueryMap.get(sourceInfo.getEventView());
String endSrcKey = _endSrcKeyMap.get(sourceInfo.getEventView());
if (sql == null) {
sql = generateEventQuery2(sourceInfo, keyName, keyType, getPKIndex(sourceInfo), getQueryHint(sourceInfo));
}
LOG.info("Chunked Query for Source (" + sourceInfo + ") is :" + sql);
LOG.info("EndSrcKey for source (" + sourceInfo + ") is :" + endSrcKey);
PrimaryKeyTxn endKeyTxn = null;
if ((null != endSrcKey) && (!endSrcKey.trim().isEmpty())) {
if (KeyType.LONG == keyType)
endKeyTxn = new PrimaryKeyTxn(new Long(endSrcKey));
else
endKeyTxn = new PrimaryKeyTxn(endSrcKey);
}
long timestamp = System.currentTimeMillis();
int numRowsFetched = 0;
long totalEventSize = 0;
long timeStart = System.currentTimeMillis();
long checkpointInterval = _commitInterval;
boolean done = false;
long lastTime = timeStart;
long numRows = 0;
PrimaryKeyTxn pKey = null;
String minKeySQL = generateMinKeyQuery(sourceInfo, keyName);
String srcName = sourceInfo.getEventView();
LOG.info("Bootstrapping for Source :" + srcName);
String lastKey = _lastKeys.get(sourceInfo.getEventView());
File f = _keyTxnFilesMap.get(srcName);
FileWriter oStream = new FileWriter(f, f.exists());
BufferedWriter keyTxnWriter = new BufferedWriter(oStream, _keyTxnBufferSizeMap.get(srcName));
_bootstrapSeedWriter.startEvents();
RateMonitor seedingRate = new RateMonitor("Seeding Rate");
RateMonitor queryRate = new RateMonitor("Query Rate");
seedingRate.start();
seedingRate.suspend();
queryRate.start();
queryRate.suspend();
boolean isException = false;
long totProcessTime = 0;
try {
conn = _dataSource.getConnection();
pstmt = conn.prepareStatement(sql);
if (_enableNumRowsQuery)
numRows = getNumRows(conn, getTableName(sourceInfo));
else
numRows = -1;
long currRowId = _lastRows.get(sourceInfo.getEventView());
/**
* First Key to be seeded will be decided in the following order:
* 1. Use bootstrap_seeder_state's last srcKey as the key for the first chunk.
* 2. If (1) is empty, use passed-in begin srcKey.
* 3. If (2) is also empty, use Oracle's minKey as the first Chunk Key.
*/
if (null == lastKey) {
lastKey = _beginSrcKeyMap.get(sourceInfo.getEventView());
LOG.info("No last Src Key available in bootstrap_seeder_state for source (" + sourceInfo + ". Trying beginSrc Key from config :" + lastKey);
}
if ((null == lastKey) || (lastKey.trim().isEmpty())) {
if (KeyType.LONG == keyType)
pKey = new PrimaryKeyTxn(executeAndGetLong(minKeySQL));
else
pKey = new PrimaryKeyTxn(executeAndGetString(minKeySQL));
} else {
if (KeyType.LONG == keyType)
pKey = new PrimaryKeyTxn(Long.parseLong(lastKey));
else
pKey = new PrimaryKeyTxn(lastKey);
}
PrimaryKeyTxn lastRoundKeyTxn = new PrimaryKeyTxn(pKey);
PrimaryKeyTxn lastKeyTxn = new PrimaryKeyTxn(pKey);
long numUniqueKeysThisRound = 0;
boolean first = true;
_rate.resume();
while (!done) {
LOG.info("MinKey being used for this round:" + pKey);
numUniqueKeysThisRound = 0;
try {
lastRoundKeyTxn.copyFrom(pKey);
if (KeyType.LONG == keyType) {
pstmt.setLong(1, pKey.getKey());
} else {
String key = pKey.getKeyStr();
pstmt.setString(1, key);
}
pstmt.setLong(2, _numRowsPerQuery);
pstmt.setFetchSize(_numRowsPrefetch);
if (_oraclePreparedStatementClass.isInstance(pstmt)) {
try {
_setLobPrefetchSizeMethod.invoke(pstmt, _LOBPrefetchSize);
} catch (Exception e) {
throw new EventCreationException("Unable to set Lob Prefetch size" + e.getMessage());
}
}
LOG.info("Executing Oracle Query :" + sql + ". Key: " + pKey + ",NumRows: " + _numRowsPerQuery);
queryRate.resume();
rs = pstmt.executeQuery();
queryRate.suspend();
LOG.info("Total Query Latency :" + queryRate.getDuration() / 1000000000L);
long totLatency = 0;
long txnId = 0;
int numRowsThisRound = 0;
seedingRate.resume();
while (rs.next()) {
_rate.tick();
seedingRate.tick();
currRowId++;
txnId = rs.getLong(2);
if (KeyType.LONG == keyType) {
pKey.setKeyTxn(rs.getLong(1), txnId);
} else {
String key = rs.getString(1);
pKey.setKeyStrTxn(key, txnId);
}
//Write TXN to file
pKey.writeTo(keyTxnWriter);
//LOG.info("TXNId is :" + txnId + ",RowId is :" + currRowId);
long start = System.nanoTime();
long eventSize = sourceInfo.getFactory().createAndAppendEvent(maxScn, timestamp, rs, _bootstrapSeedWriter, false, null);
long latency = System.nanoTime() - start;
totLatency += latency;
totalEventSize += eventSize;
totProcessTime += (totLatency / 1000 * 1000);
numRowsFetched++;
numRowsThisRound++;
if (lastKeyTxn.compareKey(pKey) != 0) {
numUniqueKeysThisRound++;
lastKeyTxn.copyFrom(pKey);
}
if (numRowsFetched % checkpointInterval == 0) {
// Commit this batch and reinit
_bootstrapSeedWriter.endEvents(currRowId, timestamp, null);
keyTxnWriter.flush();
_bootstrapSeedWriter.startEvents();
long procTime = totLatency / 1000000000;
long currTime = System.currentTimeMillis();
long diff = (currTime - lastTime) / 1000;
long timeSinceStart = (currTime - timeStart) / 1000;
double currRate = _rate.getRate();
currRate = (currRate <= 0) ? 1 : currRate;
if (_enableNumRowsQuery) {
double remTime = (numRows - currRowId) / (currRate);
LOG.info("Processed " + checkpointInterval + " rows in " + diff + " seconds, Processing Time (seconds) so far :" + (procTime) + ",Seconds elapsed since start :" + (timeSinceStart) + ",Approx Seconds remaining :" + remTime + ",Overall Row Rate:" + _rate.getRate() + "(" + seedingRate.getRate() + ")" + ",NumRows Fetched so far:" + numRowsFetched + ". TotalEventSize :" + totalEventSize);
} else {
LOG.info("Processed " + checkpointInterval + " rows in " + diff + " seconds, Processing Time (seconds) so far :" + (procTime) + ",Seconds elapsed since start :" + (timeSinceStart) + ",Overall Row Rate:" + _rate.getRate() + "(" + seedingRate.getRate() + ")" + ",NumRows Fetched so far:" + numRowsFetched + ". TotalEventSize :" + totalEventSize);
}
lastTime = currTime;
}
if ((null != endKeyTxn) && (endKeyTxn.compareKey(lastKeyTxn) < 0)) {
LOG.info("Seeding to be stopped for current source as it has completed seeding upto endSrckey :" + endKeyTxn + ", Current SrcKey :" + lastKeyTxn);
break;
}
}
seedingRate.suspend();
if ((numRowsThisRound <= 1) || ((numRowsThisRound < _numRowsPerQuery) && (numUniqueKeysThisRound <= 1))) {
LOG.info("Seeding Done for source :" + sourceInfo.getEventView() + ", numRowsThisRound :" + numRowsThisRound + ", _numRowsPerQuery :" + _numRowsPerQuery + ", numUniqueKeys :" + numUniqueKeysThisRound);
done = true;
} else if ((numRowsThisRound == _numRowsPerQuery) && (numUniqueKeysThisRound <= 1)) {
String msg = "Seeding stuck at infinte loop for source : " + sourceInfo.getEventView() + ", numRowsThisRound :" + numRowsThisRound + ", _numRowsPerQuery :" + _numRowsPerQuery + ", numUniqueKeys :" + numUniqueKeysThisRound + ", lastChunkKey :" + lastRoundKeyTxn;
LOG.error(msg);
throw new DatabusException(msg);
} else if (null != endKeyTxn) {
if (endKeyTxn.compareKey(lastKeyTxn) < 0) {
LOG.info("Seeding stopped for source :" + sourceInfo.getEventView() + ", as it has completed seeding upto the endSrckey :" + endKeyTxn + ", numRowsThisRound :" + numRowsThisRound + ", _numRowsPerQuery :" + _numRowsPerQuery + ", numUniqueKeys :" + numUniqueKeysThisRound + " , Current SrcKey :" + lastKeyTxn);
done = true;
}
}
if (currRowId > 0 && (!first || done)) {
//Since next time, we will read the last seen record again
currRowId--;
}
LOG.info("about to call end events with currRowId = " + currRowId);
first = false;
_bootstrapSeedWriter.endEvents(currRowId, timestamp, null);
isException = false;
} catch (SQLException ex) {
LOG.error("Got SQLException for source (" + sourceInfo + ")", ex);
_bootstrapSeedWriter.rollbackEvents();
numRetry++;
isException = true;
if (numRetry >= retryMax) {
throw new DatabusException("Error: Reached max retries for reading/processing bootstrap", ex);
}
} finally {
DBHelper.close(rs);
rs = null;
}
}
} catch (DatabusException ex) {
isException = true;
throw ex;
} finally {
DBHelper.close(rs, pstmt, conn);
keyTxnWriter.close();
rs = null;
_rate.suspend();
if (!isException) {
dedupeKeyTxnFile(_keyTxnFilesMap.get(srcName), keyType);
}
}
long timeEnd = System.currentTimeMillis();
long elapsedMin = (timeEnd - timeStart) / (MILLISEC_TO_MIN);
LOG.info("Processed " + numRowsFetched + " rows of Source: " + sourceInfo.getSourceName() + " in " + elapsedMin + " minutes");
return new EventReaderSummary(sourceInfo.getSourceId(), sourceInfo.getSourceName(), -1, numRowsFetched, totalEventSize, (timeEnd - timeStart), totProcessTime, 0, 0, 0);
}
use of com.linkedin.databus.core.util.RateMonitor in project databus by linkedin.
the class GGXMLTrailTransactionFinder method reset.
@Override
public void reset() {
_txnPos = new ScnTxnPos();
_prevTxnPos = new ScnTxnPos();
_currFile = null;
_currFileByteOffset = 0;
_currLineNumber = 0;
_numTxnsSeen = 0;
_numInvalidTxnsSeen = 0;
_txnEndSeen = false;
_beginTxnSeen = false;
_firstTxnSeen = false;
_currTxnStr.setLength(0);
_queryRateMonitor = new RateMonitor("XPath_GGTransactionFinder");
_queryRateMonitor.start();
_queryRateMonitor.suspend();
_rateMonitor = new RateMonitor("GGTransactionFinder");
_rateMonitor.start();
_rateMonitor.suspend();
}
Aggregations