Search in sources :

Example 1 with RateMonitor

use of com.linkedin.databus.core.util.RateMonitor in project databus by linkedin.

the class BootstrapAvroFileEventReader method readEventsFromHadoopFiles.

private EventReaderSummary readEventsFromHadoopFiles(OracleTriggerMonitoredSourceInfo sourceInfo, File avroSeedDir, Long windowSCN) {
    DataFileReader<GenericRecord> reader = null;
    File[] files = avroSeedDir.listFiles();
    List<File> fileList = Arrays.asList(files);
    Collections.sort(fileList);
    long numRead = 0;
    long prevNumRead = 0;
    long numBytes = 0;
    long timestamp = System.currentTimeMillis();
    long timeStart = timestamp;
    long lastTime = timestamp;
    long commitInterval = _config.getCommitInterval();
    long totLatency = 0;
    GenericRecord record = null;
    RateMonitor seedingRate = new RateMonitor("Seeding Rate");
    seedingRate.start();
    seedingRate.suspend();
    long startRowId = _lastRows.get(sourceInfo.getEventView());
    LOG.info("Last Known Row Id is :" + startRowId);
    boolean resumeSeedingRate = true;
    for (File avroSeedFile : files) {
        if (!avroSeedFile.isFile())
            continue;
        LOG.info("Seeding from File : " + avroSeedFile);
        try {
            reader = new DataFileReader<GenericRecord>(avroSeedFile, new GenericDatumReader<GenericRecord>());
        } catch (IOException e) {
            LOG.fatal("Failed to bootstrap from file " + avroSeedFile.getAbsolutePath(), e);
            throw new RuntimeException("Failed to bootstrap from file " + avroSeedFile.getAbsolutePath(), e);
        }
        try {
            boolean committed = false;
            for (GenericRecord hdfsRecord : reader) {
                record = hdfsRecord;
                committed = false;
                numRead++;
                if (numRead < startRowId)
                    continue;
                if (resumeSeedingRate) {
                    seedingRate.resume();
                    resumeSeedingRate = false;
                }
                seedingRate.tick();
                //LOG.info("Read record :" + record);	    			
                long start = System.nanoTime();
                long eventSize = sourceInfo.getFactory().createAndAppendEvent(windowSCN, timestamp, hdfsRecord, _bootstrapEventBuffer, false, null);
                numBytes += eventSize;
                long latency = System.nanoTime() - start;
                totLatency += latency;
                if (numRead % commitInterval == 0) {
                    _bootstrapEventBuffer.endEvents(numRead, timestamp, null);
                    _bootstrapEventBuffer.startEvents();
                    long procTime = totLatency / 1000000000;
                    long currTime = System.currentTimeMillis();
                    long diff = (currTime - lastTime) / 1000;
                    long timeSinceStart = (currTime - timeStart) / 1000;
                    LOG.info("Processed " + commitInterval + " rows in " + diff + " seconds, Avro Processing Time (seconds) so far :" + (procTime) + ",Seconds elapsed since start :" + (timeSinceStart) + ",Overall Row Rate:" + seedingRate.getRate() + ", NumRows Fetched so far:" + numRead + ". TotalEventSize :" + numBytes);
                    lastTime = currTime;
                    seedingRate.resume();
                    committed = true;
                }
            }
            if (!committed) {
                _bootstrapEventBuffer.endEvents(numRead, timestamp, null);
                _bootstrapEventBuffer.startEvents();
                long procTime = totLatency / 1000000000;
                long currTime = System.currentTimeMillis();
                long diff = (currTime - lastTime) / 1000;
                long timeSinceStart = (currTime - timeStart) / 1000;
                LOG.info("Completed Seeding from : " + avroSeedFile + ", Processed " + commitInterval + " rows in " + diff + " seconds, Avro Processing Time (seconds) so far :" + (procTime) + ",Seconds elapsed since start :" + (timeSinceStart) + ",Overall Row Rate:" + seedingRate.getRate() + ", NumRows Fetched so far:" + numRead + ". TotalEventSize :" + numBytes);
                lastTime = currTime;
                seedingRate.resume();
            }
        } catch (Exception e) {
            LOG.fatal("NumRead :" + numRead + ", Got Exception while processing generic record :" + record, e);
            throw new RuntimeException(e);
        }
        LOG.info("Processed " + (numRead - prevNumRead) + " rows of Source: " + sourceInfo.getSourceName() + " from file " + avroSeedFile);
        prevNumRead = numRead;
    }
    long timeEnd = System.currentTimeMillis();
    long elapsedMin = (timeEnd - timeStart) / (MILLISEC_TO_MIN);
    LOG.info("Processed " + numRead + " rows of Source: " + sourceInfo.getSourceName() + " in " + elapsedMin + " minutes");
    return new EventReaderSummary(sourceInfo.getSourceId(), sourceInfo.getSourceName(), -1, (int) numRead, numBytes, (timeEnd - timeStart), (timeEnd - timeStart) / numRead, 0, 0, 0);
}
Also used : GenericDatumReader(org.apache.avro.generic.GenericDatumReader) IOException(java.io.IOException) RateMonitor(com.linkedin.databus.core.util.RateMonitor) EventCreationException(com.linkedin.databus2.producers.EventCreationException) DatabusException(com.linkedin.databus2.core.DatabusException) InvalidConfigException(com.linkedin.databus.core.util.InvalidConfigException) IOException(java.io.IOException) UnsupportedKeyException(com.linkedin.databus.core.UnsupportedKeyException) EventReaderSummary(com.linkedin.databus2.producers.db.EventReaderSummary) GenericRecord(org.apache.avro.generic.GenericRecord) File(java.io.File)

Example 2 with RateMonitor

use of com.linkedin.databus.core.util.RateMonitor in project databus by linkedin.

the class BootstrapSrcDBEventReader method readEventsForSource.

private EventReaderSummary readEventsForSource(OracleTriggerMonitoredSourceInfo sourceInfo, long maxScn) throws DatabusException, EventCreationException, UnsupportedKeyException, SQLException, IOException {
    int retryMax = _numRetries;
    int numRetry = 0;
    Connection conn = null;
    PreparedStatement pstmt = null;
    ResultSet rs = null;
    KeyType keyType = _pKeyTypeMap.get(sourceInfo.getEventView());
    String keyName = _pKeyNameMap.get(sourceInfo.getEventView());
    String sql = _eventQueryMap.get(sourceInfo.getEventView());
    String endSrcKey = _endSrcKeyMap.get(sourceInfo.getEventView());
    if (sql == null) {
        sql = generateEventQuery2(sourceInfo, keyName, keyType, getPKIndex(sourceInfo), getQueryHint(sourceInfo));
    }
    LOG.info("Chunked  Query for Source (" + sourceInfo + ") is :" + sql);
    LOG.info("EndSrcKey for source (" + sourceInfo + ") is :" + endSrcKey);
    PrimaryKeyTxn endKeyTxn = null;
    if ((null != endSrcKey) && (!endSrcKey.trim().isEmpty())) {
        if (KeyType.LONG == keyType)
            endKeyTxn = new PrimaryKeyTxn(new Long(endSrcKey));
        else
            endKeyTxn = new PrimaryKeyTxn(endSrcKey);
    }
    long timestamp = System.currentTimeMillis();
    int numRowsFetched = 0;
    long totalEventSize = 0;
    long timeStart = System.currentTimeMillis();
    long checkpointInterval = _commitInterval;
    boolean done = false;
    long lastTime = timeStart;
    long numRows = 0;
    PrimaryKeyTxn pKey = null;
    String minKeySQL = generateMinKeyQuery(sourceInfo, keyName);
    String srcName = sourceInfo.getEventView();
    LOG.info("Bootstrapping for Source :" + srcName);
    String lastKey = _lastKeys.get(sourceInfo.getEventView());
    File f = _keyTxnFilesMap.get(srcName);
    FileWriter oStream = new FileWriter(f, f.exists());
    BufferedWriter keyTxnWriter = new BufferedWriter(oStream, _keyTxnBufferSizeMap.get(srcName));
    _bootstrapSeedWriter.startEvents();
    RateMonitor seedingRate = new RateMonitor("Seeding Rate");
    RateMonitor queryRate = new RateMonitor("Query Rate");
    seedingRate.start();
    seedingRate.suspend();
    queryRate.start();
    queryRate.suspend();
    boolean isException = false;
    long totProcessTime = 0;
    try {
        conn = _dataSource.getConnection();
        pstmt = conn.prepareStatement(sql);
        if (_enableNumRowsQuery)
            numRows = getNumRows(conn, getTableName(sourceInfo));
        else
            numRows = -1;
        long currRowId = _lastRows.get(sourceInfo.getEventView());
        /**
			 * First Key to be seeded will be decided in the following order:
			 * 1. Use bootstrap_seeder_state's last srcKey as the key for the first chunk.
			 * 2. If (1) is empty, use passed-in begin srcKey.
			 * 3. If (2) is also empty, use Oracle's minKey as the first Chunk Key.
			 */
        if (null == lastKey) {
            lastKey = _beginSrcKeyMap.get(sourceInfo.getEventView());
            LOG.info("No last Src Key available in bootstrap_seeder_state for source (" + sourceInfo + ". Trying beginSrc Key from config :" + lastKey);
        }
        if ((null == lastKey) || (lastKey.trim().isEmpty())) {
            if (KeyType.LONG == keyType)
                pKey = new PrimaryKeyTxn(executeAndGetLong(minKeySQL));
            else
                pKey = new PrimaryKeyTxn(executeAndGetString(minKeySQL));
        } else {
            if (KeyType.LONG == keyType)
                pKey = new PrimaryKeyTxn(Long.parseLong(lastKey));
            else
                pKey = new PrimaryKeyTxn(lastKey);
        }
        PrimaryKeyTxn lastRoundKeyTxn = new PrimaryKeyTxn(pKey);
        PrimaryKeyTxn lastKeyTxn = new PrimaryKeyTxn(pKey);
        long numUniqueKeysThisRound = 0;
        boolean first = true;
        _rate.resume();
        while (!done) {
            LOG.info("MinKey being used for this round:" + pKey);
            numUniqueKeysThisRound = 0;
            try {
                lastRoundKeyTxn.copyFrom(pKey);
                if (KeyType.LONG == keyType) {
                    pstmt.setLong(1, pKey.getKey());
                } else {
                    String key = pKey.getKeyStr();
                    pstmt.setString(1, key);
                }
                pstmt.setLong(2, _numRowsPerQuery);
                pstmt.setFetchSize(_numRowsPrefetch);
                if (_oraclePreparedStatementClass.isInstance(pstmt)) {
                    try {
                        _setLobPrefetchSizeMethod.invoke(pstmt, _LOBPrefetchSize);
                    } catch (Exception e) {
                        throw new EventCreationException("Unable to set Lob Prefetch size" + e.getMessage());
                    }
                }
                LOG.info("Executing Oracle Query :" + sql + ". Key: " + pKey + ",NumRows: " + _numRowsPerQuery);
                queryRate.resume();
                rs = pstmt.executeQuery();
                queryRate.suspend();
                LOG.info("Total Query Latency :" + queryRate.getDuration() / 1000000000L);
                long totLatency = 0;
                long txnId = 0;
                int numRowsThisRound = 0;
                seedingRate.resume();
                while (rs.next()) {
                    _rate.tick();
                    seedingRate.tick();
                    currRowId++;
                    txnId = rs.getLong(2);
                    if (KeyType.LONG == keyType) {
                        pKey.setKeyTxn(rs.getLong(1), txnId);
                    } else {
                        String key = rs.getString(1);
                        pKey.setKeyStrTxn(key, txnId);
                    }
                    //Write TXN to file
                    pKey.writeTo(keyTxnWriter);
                    //LOG.info("TXNId is :" + txnId + ",RowId is :" + currRowId);
                    long start = System.nanoTime();
                    long eventSize = sourceInfo.getFactory().createAndAppendEvent(maxScn, timestamp, rs, _bootstrapSeedWriter, false, null);
                    long latency = System.nanoTime() - start;
                    totLatency += latency;
                    totalEventSize += eventSize;
                    totProcessTime += (totLatency / 1000 * 1000);
                    numRowsFetched++;
                    numRowsThisRound++;
                    if (lastKeyTxn.compareKey(pKey) != 0) {
                        numUniqueKeysThisRound++;
                        lastKeyTxn.copyFrom(pKey);
                    }
                    if (numRowsFetched % checkpointInterval == 0) {
                        // Commit this batch and reinit
                        _bootstrapSeedWriter.endEvents(currRowId, timestamp, null);
                        keyTxnWriter.flush();
                        _bootstrapSeedWriter.startEvents();
                        long procTime = totLatency / 1000000000;
                        long currTime = System.currentTimeMillis();
                        long diff = (currTime - lastTime) / 1000;
                        long timeSinceStart = (currTime - timeStart) / 1000;
                        double currRate = _rate.getRate();
                        currRate = (currRate <= 0) ? 1 : currRate;
                        if (_enableNumRowsQuery) {
                            double remTime = (numRows - currRowId) / (currRate);
                            LOG.info("Processed " + checkpointInterval + " rows in " + diff + " seconds, Processing Time (seconds) so far :" + (procTime) + ",Seconds elapsed since start :" + (timeSinceStart) + ",Approx Seconds remaining :" + remTime + ",Overall Row Rate:" + _rate.getRate() + "(" + seedingRate.getRate() + ")" + ",NumRows Fetched so far:" + numRowsFetched + ". TotalEventSize :" + totalEventSize);
                        } else {
                            LOG.info("Processed " + checkpointInterval + " rows in " + diff + " seconds, Processing Time (seconds) so far :" + (procTime) + ",Seconds elapsed since start :" + (timeSinceStart) + ",Overall Row Rate:" + _rate.getRate() + "(" + seedingRate.getRate() + ")" + ",NumRows Fetched so far:" + numRowsFetched + ". TotalEventSize :" + totalEventSize);
                        }
                        lastTime = currTime;
                    }
                    if ((null != endKeyTxn) && (endKeyTxn.compareKey(lastKeyTxn) < 0)) {
                        LOG.info("Seeding to be stopped for current source as it has completed seeding upto endSrckey :" + endKeyTxn + ", Current SrcKey :" + lastKeyTxn);
                        break;
                    }
                }
                seedingRate.suspend();
                if ((numRowsThisRound <= 1) || ((numRowsThisRound < _numRowsPerQuery) && (numUniqueKeysThisRound <= 1))) {
                    LOG.info("Seeding Done for source :" + sourceInfo.getEventView() + ", numRowsThisRound :" + numRowsThisRound + ", _numRowsPerQuery :" + _numRowsPerQuery + ", numUniqueKeys :" + numUniqueKeysThisRound);
                    done = true;
                } else if ((numRowsThisRound == _numRowsPerQuery) && (numUniqueKeysThisRound <= 1)) {
                    String msg = "Seeding stuck at infinte loop for source : " + sourceInfo.getEventView() + ", numRowsThisRound :" + numRowsThisRound + ", _numRowsPerQuery :" + _numRowsPerQuery + ", numUniqueKeys :" + numUniqueKeysThisRound + ", lastChunkKey :" + lastRoundKeyTxn;
                    LOG.error(msg);
                    throw new DatabusException(msg);
                } else if (null != endKeyTxn) {
                    if (endKeyTxn.compareKey(lastKeyTxn) < 0) {
                        LOG.info("Seeding stopped for source :" + sourceInfo.getEventView() + ", as it has completed seeding upto the endSrckey :" + endKeyTxn + ", numRowsThisRound :" + numRowsThisRound + ", _numRowsPerQuery :" + _numRowsPerQuery + ", numUniqueKeys :" + numUniqueKeysThisRound + " , Current SrcKey :" + lastKeyTxn);
                        done = true;
                    }
                }
                if (currRowId > 0 && (!first || done)) {
                    //Since next time, we will read the last seen record again
                    currRowId--;
                }
                LOG.info("about to call end events with currRowId = " + currRowId);
                first = false;
                _bootstrapSeedWriter.endEvents(currRowId, timestamp, null);
                isException = false;
            } catch (SQLException ex) {
                LOG.error("Got SQLException for source (" + sourceInfo + ")", ex);
                _bootstrapSeedWriter.rollbackEvents();
                numRetry++;
                isException = true;
                if (numRetry >= retryMax) {
                    throw new DatabusException("Error: Reached max retries for reading/processing bootstrap", ex);
                }
            } finally {
                DBHelper.close(rs);
                rs = null;
            }
        }
    } catch (DatabusException ex) {
        isException = true;
        throw ex;
    } finally {
        DBHelper.close(rs, pstmt, conn);
        keyTxnWriter.close();
        rs = null;
        _rate.suspend();
        if (!isException) {
            dedupeKeyTxnFile(_keyTxnFilesMap.get(srcName), keyType);
        }
    }
    long timeEnd = System.currentTimeMillis();
    long elapsedMin = (timeEnd - timeStart) / (MILLISEC_TO_MIN);
    LOG.info("Processed " + numRowsFetched + " rows of Source: " + sourceInfo.getSourceName() + " in " + elapsedMin + " minutes");
    return new EventReaderSummary(sourceInfo.getSourceId(), sourceInfo.getSourceName(), -1, numRowsFetched, totalEventSize, (timeEnd - timeStart), totProcessTime, 0, 0, 0);
}
Also used : KeyType(com.linkedin.databus.core.DbusEventKey.KeyType) SQLException(java.sql.SQLException) EventCreationException(com.linkedin.databus2.producers.EventCreationException) FileWriter(java.io.FileWriter) Connection(java.sql.Connection) PreparedStatement(java.sql.PreparedStatement) RateMonitor(com.linkedin.databus.core.util.RateMonitor) UnsupportedKeyException(com.linkedin.databus.core.UnsupportedKeyException) SQLException(java.sql.SQLException) EventCreationException(com.linkedin.databus2.producers.EventCreationException) DatabusException(com.linkedin.databus2.core.DatabusException) InvalidConfigException(com.linkedin.databus.core.util.InvalidConfigException) IOException(java.io.IOException) BufferedWriter(java.io.BufferedWriter) DatabusException(com.linkedin.databus2.core.DatabusException) EventReaderSummary(com.linkedin.databus2.producers.db.EventReaderSummary) ResultSet(java.sql.ResultSet) File(java.io.File)

Example 3 with RateMonitor

use of com.linkedin.databus.core.util.RateMonitor in project databus by linkedin.

the class GGXMLTrailTransactionFinder method reset.

@Override
public void reset() {
    _txnPos = new ScnTxnPos();
    _prevTxnPos = new ScnTxnPos();
    _currFile = null;
    _currFileByteOffset = 0;
    _currLineNumber = 0;
    _numTxnsSeen = 0;
    _numInvalidTxnsSeen = 0;
    _txnEndSeen = false;
    _beginTxnSeen = false;
    _firstTxnSeen = false;
    _currTxnStr.setLength(0);
    _queryRateMonitor = new RateMonitor("XPath_GGTransactionFinder");
    _queryRateMonitor.start();
    _queryRateMonitor.suspend();
    _rateMonitor = new RateMonitor("GGTransactionFinder");
    _rateMonitor.start();
    _rateMonitor.suspend();
}
Also used : RateMonitor(com.linkedin.databus.core.util.RateMonitor) ScnTxnPos(com.linkedin.databus.core.ScnTxnPos)

Aggregations

RateMonitor (com.linkedin.databus.core.util.RateMonitor)3 UnsupportedKeyException (com.linkedin.databus.core.UnsupportedKeyException)2 InvalidConfigException (com.linkedin.databus.core.util.InvalidConfigException)2 DatabusException (com.linkedin.databus2.core.DatabusException)2 EventCreationException (com.linkedin.databus2.producers.EventCreationException)2 EventReaderSummary (com.linkedin.databus2.producers.db.EventReaderSummary)2 File (java.io.File)2 IOException (java.io.IOException)2 KeyType (com.linkedin.databus.core.DbusEventKey.KeyType)1 ScnTxnPos (com.linkedin.databus.core.ScnTxnPos)1 BufferedWriter (java.io.BufferedWriter)1 FileWriter (java.io.FileWriter)1 Connection (java.sql.Connection)1 PreparedStatement (java.sql.PreparedStatement)1 ResultSet (java.sql.ResultSet)1 SQLException (java.sql.SQLException)1 GenericDatumReader (org.apache.avro.generic.GenericDatumReader)1 GenericRecord (org.apache.avro.generic.GenericRecord)1