Search in sources :

Example 6 with ProvenanceEventRecordDTO

use of com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTO in project kylo by Teradata.

the class FeedStatistics method addEvent.

public void addEvent(ProvenanceEventRecord event, Long eventId) {
    FeedEventStatistics.getInstance().calculateTimes(event, eventId);
    ProvenanceEventRecordDTO eventRecordDTO = null;
    String feedFlowFileId = FeedEventStatistics.getInstance().getFeedFlowFileId(event);
    boolean isStartingFeedFlow = ProvenanceEventUtil.isStartingFeedFlow(event);
    String batchKey = batchKey(event, feedFlowFileId, isStartingFeedFlow);
    // always track drop events if its on a tracked feed
    boolean isDropEvent = ProvenanceEventUtil.isEndingFlowFileEvent(event);
    if (isDropEvent && FeedEventStatistics.getInstance().beforeProcessingIsLastEventForTrackedFeed(event, eventId)) {
        batchKey += UUID.randomUUID().toString();
    }
    if (((!isStartingFeedFlow && FeedEventStatistics.getInstance().isTrackingDetails(event.getFlowFileUuid())) || (isStartingFeedFlow && lastRecords.size() <= limit)) && !lastRecords.containsKey(batchKey)) {
        // if we are tracking details send the event off for jms
        if (isStartingFeedFlow) {
            FeedEventStatistics.getInstance().setTrackingDetails(event);
        }
        eventRecordDTO = ProvenanceEventRecordConverter.convert(event);
        eventRecordDTO.setEventId(eventId);
        eventRecordDTO.setIsStartOfJob(ProvenanceEventUtil.isStartingFeedFlow(event));
        eventRecordDTO.setJobFlowFileId(feedFlowFileId);
        eventRecordDTO.setFirstEventProcessorId(feedProcessorId);
        eventRecordDTO.setStartTime(FeedEventStatistics.getInstance().getEventStartTime(eventId));
        eventRecordDTO.setEventDuration(FeedEventStatistics.getInstance().getEventDuration(eventId));
        if (ProvenanceEventUtil.isFlowFileQueueEmptied(event)) {
            // a Drop event component id will be the connection, not the processor id. we will set the name of the component
            eventRecordDTO.setComponentName("FlowFile Queue emptied");
            eventRecordDTO.setIsFailure(true);
        }
        if (ProvenanceEventUtil.isTerminatedByFailureRelationship(event)) {
            eventRecordDTO.setIsFailure(true);
        }
        lastRecords.put(batchKey, eventRecordDTO);
    } else {
        FeedEventStatistics.getInstance().skip(event, eventId);
    }
    FeedEventStatistics.getInstance().finishedEvent(event, eventId);
    boolean isEndingEvent = FeedEventStatistics.getInstance().isEndingFeedFlow(eventId);
    if (eventRecordDTO != null && isEndingEvent) {
        eventRecordDTO.setIsFinalJobEvent(isEndingEvent);
    }
    FeedProcessorStatisticsAggregator.getInstance().add(getStats(event), event, eventId);
    FeedEventStatistics.getInstance().cleanup(event, eventId);
}
Also used : ProvenanceEventRecordDTO(com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTO)

Example 7 with ProvenanceEventRecordDTO

use of com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTO in project kylo by Teradata.

the class FeedStatisticsManager method gatherStatistics.

public void gatherStatistics() {
    lock.lock();
    List<ProvenanceEventRecordDTO> eventsToSend = null;
    Map<String, AggregatedFeedProcessorStatistics> statsToSend = null;
    try {
        // Gather Events and Stats to send Ops Manager
        // filter out the streaming feeds
        ensureStreamingFeedMetadata();
        eventsToSend = feedStatisticsMap.values().stream().flatMap(stats -> stats.getEventsToSend().stream().filter(event -> !FeedEventStatistics.getInstance().streamingFeedProcessorIdsList.contains(event.getFirstEventProcessorId()))).sorted(Comparator.comparing(ProvenanceEventRecordDTO::getEventTime).thenComparing(ProvenanceEventRecordDTO::getEventId)).collect(Collectors.toList());
        final String collectionId = UUID.randomUUID().toString();
        Map<String, Long> runningFlowsCount = new HashMap<>();
        for (FeedStatistics feedStatistics : feedStatisticsMap.values()) {
            if (feedStatistics.hasStats()) {
                if (statsToSend == null) {
                    statsToSend = new ConcurrentHashMap<>();
                }
                AggregatedFeedProcessorStatistics feedProcessorStatistics = statsToSend.computeIfAbsent(feedStatistics.getFeedProcessorId(), feedProcessorId -> new AggregatedFeedProcessorStatistics(feedStatistics.getFeedProcessorId(), collectionId, sendJmsTimeMillis));
                AggregatedProcessorStatistics processorStatistics = feedProcessorStatistics.getProcessorStats().computeIfAbsent(feedStatistics.getProcessorId(), processorId -> new AggregatedProcessorStatisticsV2(feedStatistics.getProcessorId(), null, collectionId));
                // accumulate the stats together into the processorStatistics object grouped by source connection id
                feedStatistics.getStats().stream().forEach(stats -> {
                    FeedProcessorStatisticsAggregator.getInstance().addStats1(processorStatistics.getStats(stats.getSourceConnectionIdentifier()), stats);
                });
            }
        }
        if ((eventsToSend != null && !eventsToSend.isEmpty()) || (statsToSend != null && !statsToSend.isEmpty())) {
            // send it off to jms on a different thread
            JmsSender jmsSender = new JmsSender(eventsToSend, statsToSend.values(), FeedEventStatistics.getInstance().getRunningFeedFlowsForFeed(statsToSend.keySet()));
            this.jmsService.submit(new JmsSenderConsumer(jmsSender));
        } else {
            // if we are empty but the runningFlows have changed, then send off as well
            if (FeedEventStatistics.getInstance().isFeedProcessorRunningFeedFlowsChanged()) {
                JmsSender jmsSender = new JmsSender(null, null, FeedEventStatistics.getInstance().getRunningFeedFlowsChanged());
                this.jmsService.submit(new JmsSenderConsumer(jmsSender));
            }
        }
    } finally {
        FeedEventStatistics.getInstance().markFeedProcessorRunningFeedFlowsUnchanged();
        feedStatisticsMap.values().stream().forEach(stats -> stats.clear());
        lock.unlock();
    }
}
Also used : ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) KyloFeedBatchStreamTypeJmsListener(com.thinkbiganalytics.nifi.provenance.jms.KyloFeedBatchStreamTypeJmsListener) ScheduledFuture(java.util.concurrent.ScheduledFuture) LoggerFactory(org.slf4j.LoggerFactory) AggregatedFeedProcessorStatistics(com.thinkbiganalytics.nifi.provenance.model.stats.AggregatedFeedProcessorStatistics) HashMap(java.util.HashMap) ProvenanceEventRecord(org.apache.nifi.provenance.ProvenanceEventRecord) Map(java.util.Map) AggregatedProcessorStatistics(com.thinkbiganalytics.nifi.provenance.model.stats.AggregatedProcessorStatistics) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) AggregatedProcessorStatisticsV2(com.thinkbiganalytics.nifi.provenance.model.stats.AggregatedProcessorStatisticsV2) ThreadFactory(java.util.concurrent.ThreadFactory) ProvenanceEventRecordDTO(com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTO) ExecutorService(java.util.concurrent.ExecutorService) Logger(org.slf4j.Logger) ReentrantLock(java.util.concurrent.locks.ReentrantLock) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) UUID(java.util.UUID) SpringApplicationContext(com.thinkbiganalytics.nifi.provenance.util.SpringApplicationContext) Collectors(java.util.stream.Collectors) Executors(java.util.concurrent.Executors) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) Lock(java.util.concurrent.locks.Lock) ProvenanceEventUtil(com.thinkbiganalytics.nifi.provenance.util.ProvenanceEventUtil) Comparator(java.util.Comparator) ProvenanceEventRecordDTO(com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTO) HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) AggregatedFeedProcessorStatistics(com.thinkbiganalytics.nifi.provenance.model.stats.AggregatedFeedProcessorStatistics) AggregatedProcessorStatistics(com.thinkbiganalytics.nifi.provenance.model.stats.AggregatedProcessorStatistics) AggregatedProcessorStatisticsV2(com.thinkbiganalytics.nifi.provenance.model.stats.AggregatedProcessorStatisticsV2)

Example 8 with ProvenanceEventRecordDTO

use of com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTO in project kylo by Teradata.

the class KyloJmsProvenanceEventService method sendEvents.

private void sendEvents(String jmsUrl, List<ProvenanceEventRecordDTO> events) throws Exception {
    ProvenanceEventRecordDTOHolder eventRecordDTOHolder = new ProvenanceEventRecordDTOHolder();
    List<ProvenanceEventRecordDTO> batchEvents = new ArrayList<>();
    for (ProvenanceEventRecordDTO event : events) {
        if (!event.isStream()) {
            batchEvents.add(event);
        }
    }
    eventRecordDTOHolder.setEvents(batchEvents);
    AggregatedFeedProcessorStatisticsHolder stats = GroupedStatsUtil.gatherStats(events);
    log.info("Sending {} events to JMS ", eventRecordDTOHolder);
    sendKyloBatchEventMessage(jmsUrl, eventRecordDTOHolder);
    sendKyloEventStatisticsMessage(jmsUrl, stats);
    log.info("Events successfully sent to JMS");
}
Also used : ProvenanceEventRecordDTO(com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTO) ProvenanceEventRecordDTOHolder(com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTOHolder) ArrayList(java.util.ArrayList) AggregatedFeedProcessorStatisticsHolder(com.thinkbiganalytics.nifi.provenance.model.stats.AggregatedFeedProcessorStatisticsHolder)

Example 9 with ProvenanceEventRecordDTO

use of com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTO in project kylo by Teradata.

the class TestProvenanceRest method testProvenanceRest.

// @Test
public void testProvenanceRest() {
    ProvenanceEventService restProvenanceEventService = new KyloRestProvenanceEventService();
    Map<String, String> params = new HashMap<>();
    params.put(KyloRestProvenanceEventService.USERNAME_CONFIG, "dladmin");
    params.put(KyloRestProvenanceEventService.PASSWORD_CONFIG, "thinkbig");
    params.put(KyloRestProvenanceEventService.HOST_CONFIG, "localhost");
    params.put(KyloRestProvenanceEventService.PORT_CONFIG, "8400");
    restProvenanceEventService.configure(params);
    String feedName = "provenance.provenance_test";
    String flowfileId = UUID.randomUUID().toString();
    DateTime startTime = DateTime.now().minusMinutes(1);
    Long start = startTime.getMillis();
    ProvenanceEventRecordDTO event1 = new ProvenanceEventDtoBuilder(feedName, flowfileId, "First Step").startingEvent(true).startTime(start).build();
    ProvenanceEventRecordDTO event2 = new ProvenanceEventDtoBuilder(feedName, flowfileId, "Second Step").startTime(startTime.plusSeconds(30).getMillis()).build();
    ProvenanceEventRecordDTO event3 = new ProvenanceEventDtoBuilder(feedName, flowfileId, "Final Step").endingEvent(true).build();
    List<ProvenanceEventRecordDTO> events = new ArrayList<>();
    events.add(event1);
    events.add(event2);
    events.add(event3);
    try {
        restProvenanceEventService.sendEvents(events);
    } catch (ProvenanceException e) {
        e.printStackTrace();
    }
}
Also used : ProvenanceEventService(com.thinkbiganalytics.provenance.api.ProvenanceEventService) ProvenanceEventDtoBuilder(com.thinkbiganalytics.nifi.provenance.model.util.ProvenanceEventDtoBuilder) ProvenanceEventRecordDTO(com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTO) ProvenanceException(com.thinkbiganalytics.provenance.api.ProvenanceException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DateTime(org.joda.time.DateTime)

Example 10 with ProvenanceEventRecordDTO

use of com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTO in project kylo by Teradata.

the class GroupedStatsUtil method gatherStats.

/**
 * Gather feed stats for a list of events
 */
public static AggregatedFeedProcessorStatisticsHolder gatherStats(final List<ProvenanceEventRecordDTO> events) {
    Map<String, Map<GroupedStatsIdentity, List<GroupedStats>>> feedStatsByProcessor = new ConcurrentHashMap<>();
    // events.stream().forEach(e -> {
    for (ProvenanceEventRecordDTO e : events) {
        if (!feedStatsByProcessor.containsKey(e.getFeedName())) {
            feedStatsByProcessor.put(e.getFeedName(), new ConcurrentHashMap<GroupedStatsIdentity, List<GroupedStats>>());
        }
        // feedStatsByProcessor.putIfAbsent(e.getFeedName(), );
        Map<GroupedStatsIdentity, List<GroupedStats>> feedStats = feedStatsByProcessor.get(e.getFeedName());
        GroupedStatsIdentity identity = new GroupedStatsIdentity(e.getComponentId(), e.getComponentName());
        if (!feedStats.containsKey(identity)) {
            feedStats.put(identity, new ArrayList<GroupedStats>());
        }
        // feedStats.putIfAbsent(identity, new ArrayList<>());
        List<GroupedStats> feedProcessorStats = feedStats.get(identity);
        // Add the new stats
        GroupedStats statsV2 = GroupedStatsUtil.add(new GroupedStatsV2(), e);
        feedProcessorStats.add(statsV2);
    }
    // );
    List<AggregatedFeedProcessorStatistics> statsList = new ArrayList<>();
    for (Map.Entry<String, Map<GroupedStatsIdentity, List<GroupedStats>>> feedStats : feedStatsByProcessor.entrySet()) {
        AggregatedFeedProcessorStatistics feedProcessorStatistics = GroupedStatsUtil.groupStatsByProcessor(feedStats.getKey(), feedStats.getValue());
        statsList.add(feedProcessorStatistics);
    }
    /* feedStatsByProcessor.entrySet().stream().forEach(feedStats -> {
            AggregatedFeedProcessorStatistics feedProcessorStatistics = GroupedStatsUtil.groupStatsByProcessor(feedStats.getKey(), feedStats.getValue());
            statsList.add(feedProcessorStatistics);
        });
        */
    AggregatedFeedProcessorStatisticsHolderV3 feedProcessorStatisticsHolderV3 = new AggregatedFeedProcessorStatisticsHolderV3();
    feedProcessorStatisticsHolderV3.setFeedStatistics(statsList);
    return feedProcessorStatisticsHolderV3;
}
Also used : GroupedStats(com.thinkbiganalytics.nifi.provenance.model.stats.GroupedStats) ProvenanceEventRecordDTO(com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTO) ArrayList(java.util.ArrayList) AggregatedFeedProcessorStatistics(com.thinkbiganalytics.nifi.provenance.model.stats.AggregatedFeedProcessorStatistics) GroupedStatsIdentity(com.thinkbiganalytics.nifi.provenance.model.stats.GroupedStatsIdentity) ArrayList(java.util.ArrayList) List(java.util.List) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Map(java.util.Map) GroupedStatsV2(com.thinkbiganalytics.nifi.provenance.model.stats.GroupedStatsV2) AggregatedFeedProcessorStatisticsHolderV3(com.thinkbiganalytics.nifi.provenance.model.stats.AggregatedFeedProcessorStatisticsHolderV3)

Aggregations

ProvenanceEventRecordDTO (com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTO)13 ArrayList (java.util.ArrayList)8 ProvenanceEventRecordDTOHolder (com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTOHolder)5 HashMap (java.util.HashMap)4 AggregatedFeedProcessorStatisticsHolder (com.thinkbiganalytics.nifi.provenance.model.stats.AggregatedFeedProcessorStatisticsHolder)3 List (java.util.List)3 Map (java.util.Map)3 OpsManagerFeed (com.thinkbiganalytics.metadata.api.feed.OpsManagerFeed)2 BatchJobExecution (com.thinkbiganalytics.metadata.api.jobrepo.job.BatchJobExecution)2 AggregatedFeedProcessorStatistics (com.thinkbiganalytics.nifi.provenance.model.stats.AggregatedFeedProcessorStatistics)2 ProvenanceEventService (com.thinkbiganalytics.provenance.api.ProvenanceEventService)2 ProvenanceException (com.thinkbiganalytics.provenance.api.ProvenanceException)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 TimeUnit (java.util.concurrent.TimeUnit)2 Lock (java.util.concurrent.locks.Lock)2 Collectors (java.util.stream.Collectors)2 Cache (com.google.common.cache.Cache)1 CacheBuilder (com.google.common.cache.CacheBuilder)1 ThreadFactoryBuilder (com.google.common.util.concurrent.ThreadFactoryBuilder)1 ClusterService (com.thinkbiganalytics.cluster.ClusterService)1