use of co.cask.cdap.logging.meta.Checkpoint in project cdap by caskdata.
the class KafkaOffsetResolverTest method assertOffsetResolverResult.
private void assertOffsetResolverResult(KafkaOffsetResolver offsetResolver, List<ILoggingEvent> events, long targetTime, long baseTime) throws IOException {
long expectedOffset = findExpectedOffsetByTime(events, targetTime);
long offset = offsetResolver.getStartOffset(new Checkpoint(Long.MAX_VALUE, targetTime, 0), 0);
Assert.assertEquals(String.format("Failed to find the expected event with the target time %d when basetime is %d ", targetTime, baseTime), expectedOffset, offset);
}
use of co.cask.cdap.logging.meta.Checkpoint in project cdap by caskdata.
the class KafkaOffsetResolverTest method testInOrderEvents.
@Test
public void testInOrderEvents() throws InterruptedException, IOException {
String topic = "testInOrderEvents";
KafkaPipelineConfig config = new KafkaPipelineConfig(topic, Collections.singleton(0), 1024L, EVENT_DELAY_MILLIS, 1048576, 200L);
KAFKA_TESTER.createTopic(topic, 1);
// Publish log messages to Kafka and wait for all messages to be published
long baseTime = System.currentTimeMillis() - EVENT_DELAY_MILLIS;
List<ILoggingEvent> inOrderEvents = new ArrayList<>();
for (int i = 0; i < 20; i++) {
inOrderEvents.add(createLoggingEvent("test.logger", Level.INFO, Integer.toString(i), baseTime + i));
}
publishLog(topic, inOrderEvents);
waitForAllLogsPublished(topic, inOrderEvents.size());
KafkaOffsetResolver offsetResolver = new KafkaOffsetResolver(KAFKA_TESTER.getBrokerService(), config);
// Use every event's timestamp as target time and assert that found offset is the next offset of the current offset
for (int i = 0; i < inOrderEvents.size(); i++) {
long targetTime = inOrderEvents.get(i).getTimeStamp();
long offset = offsetResolver.getStartOffset(new Checkpoint(Long.MAX_VALUE, targetTime, 0), 0);
Assert.assertEquals("Failed to find the expected event with the target time: " + targetTime, i + 1, offset);
}
}
use of co.cask.cdap.logging.meta.Checkpoint in project cdap by caskdata.
the class KafkaLogProcessorPipeline method initializeOffsets.
/**
* Initialize offsets for all partitions consumed by this pipeline.
*
* @throws InterruptedException if there is an interruption
*/
private void initializeOffsets() throws InterruptedException {
// Setup initial offsets
Set<Integer> partitions = new HashSet<>();
partitions.addAll(config.getPartitions());
while (!partitions.isEmpty() && !stopped) {
Iterator<Integer> iterator = partitions.iterator();
boolean failed = false;
while (iterator.hasNext()) {
int partition = iterator.next();
Checkpoint checkpoint = checkpoints.get(partition);
try {
if (checkpoint == null || checkpoint.getNextOffset() <= 0) {
// If no checkpoint, fetch from the beginning.
offsets.put(partition, getLastOffset(partition, kafka.api.OffsetRequest.EarliestTime()));
} else {
// Otherwise, validate and find the offset if not valid
offsets.put(partition, offsetResolver.getStartOffset(checkpoint, partition));
}
// Remove the partition successfully stored in offsets to avoid unnecessary retry for this partition
iterator.remove();
} catch (Exception e) {
OUTAGE_LOG.warn("Failed to get a valid offset from Kafka to start consumption for {}:{}", config.getTopic(), partition);
failed = true;
}
}
// Should keep finding valid offsets for all partitions
if (failed && !stopped) {
TimeUnit.SECONDS.sleep(2L);
}
}
}
use of co.cask.cdap.logging.meta.Checkpoint in project cdap by caskdata.
the class KafkaLogProcessorPipeline method startUp.
@Override
protected void startUp() throws Exception {
LOG.debug("Starting log processor pipeline for {} with configurations {}", name, config);
// Reads the existing checkpoints
Set<Integer> partitions = config.getPartitions();
for (Map.Entry<Integer, Checkpoint> entry : checkpointManager.getCheckpoint(partitions).entrySet()) {
Checkpoint checkpoint = entry.getValue();
// Skip the partition that doesn't have previous checkpoint.
if (checkpoint.getNextOffset() >= 0 && checkpoint.getNextEventTime() >= 0 && checkpoint.getMaxEventTime() >= 0) {
checkpoints.put(entry.getKey(), new MutableCheckpoint(checkpoint));
}
}
context.start();
fetchExecutor = Executors.newFixedThreadPool(partitions.size(), Threads.createDaemonThreadFactory("fetcher-" + name + "-%d"));
// emit pipeline related config as metrics
emitConfigMetrics();
LOG.info("Log processor pipeline for {} with config {} started with checkpoint {}", name, config, checkpoints);
}
use of co.cask.cdap.logging.meta.Checkpoint in project cdap by caskdata.
the class KafkaLogProcessorPipeline method appendEvents.
/**
* Appends buffered events to appender. If the {@code force} parameter is {@code false}, buffered events
* that are older than the buffer milliseconds will be appended and removed from the buffer.
* If {@code force} is {@code true}, then at least {@code maxQueueSize * MIN_FREE_FACTOR} events will be appended
* and removed, regardless of the event time.
*
* @return number of events appended to the appender
*/
private int appendEvents(long currentTimeMillis, boolean forced) {
long minEventTime = currentTimeMillis - config.getEventDelayMillis();
long maxRetainSize = forced ? (long) (config.getMaxBufferSize() * MIN_FREE_FACTOR) : Long.MAX_VALUE;
TimeEventQueue.EventIterator<ILoggingEvent, OffsetTime> iterator = eventQueue.iterator();
int eventsAppended = 0;
long minDelay = Long.MAX_VALUE;
long maxDelay = -1;
while (iterator.hasNext()) {
ILoggingEvent event = iterator.next();
// buffering time, no need to iterate anymore
if (eventQueue.getEventSize() <= maxRetainSize && event.getTimeStamp() >= minEventTime) {
break;
}
// update delay
long delay = System.currentTimeMillis() - event.getTimeStamp();
minDelay = delay < minDelay ? delay : minDelay;
maxDelay = delay > maxDelay ? delay : maxDelay;
try {
// Otherwise, append the event
ch.qos.logback.classic.Logger effectiveLogger = context.getEffectiveLogger(event.getLoggerName());
if (event.getLevel().isGreaterOrEqual(effectiveLogger.getEffectiveLevel())) {
effectiveLogger.callAppenders(event);
}
} catch (Exception e) {
OUTAGE_LOG.warn("Failed to append log event in pipeline {}. Will be retried.", name, e);
break;
}
// Updates the Kafka offset before removing the current event
int partition = iterator.getPartition();
MutableCheckpoint checkpoint = checkpoints.get(partition);
// Get the smallest offset and corresponding timestamp from the event queue
OffsetTime offsetTime = eventQueue.getSmallestOffset(partition);
if (checkpoint == null) {
checkpoint = new MutableCheckpoint(offsetTime.getOffset(), offsetTime.getEventTime(), event.getTimeStamp());
checkpoints.put(partition, checkpoint);
} else {
checkpoint.setNextOffset(offsetTime.getOffset()).setNextEvenTime(offsetTime.getEventTime()).setMaxEventTime(event.getTimeStamp());
}
iterator.remove();
eventsAppended++;
}
// For each partition, if there is no more event in the event queue, update the checkpoint nextOffset
for (Int2LongMap.Entry entry : offsets.int2LongEntrySet()) {
int partition = entry.getIntKey();
if (eventQueue.isEmpty(partition)) {
MutableCheckpoint checkpoint = checkpoints.get(partition);
long offset = entry.getLongValue();
// it means everything before the process offset must had been written to the appender.
if (checkpoint != null && offset > checkpoint.getNextOffset()) {
checkpoint.setNextOffset(offset);
}
}
}
if (eventsAppended > 0) {
// events were appended from iterator
metricsContext.gauge(Constants.Metrics.Name.Log.PROCESS_MIN_DELAY, minDelay);
metricsContext.gauge(Constants.Metrics.Name.Log.PROCESS_MAX_DELAY, maxDelay);
metricsContext.increment(Constants.Metrics.Name.Log.PROCESS_MESSAGES_COUNT, eventsAppended);
}
// Failure to flush is ok and it will be retried by the wrapped appender
try {
metricsContext.gauge("event.queue.size.bytes", eventQueue.getEventSize());
context.flush();
} catch (IOException e) {
OUTAGE_LOG.warn("Failed to flush in pipeline {}. Will be retried.", name, e);
}
return eventsAppended;
}
Aggregations