Search in sources :

Example 11 with ProvenanceEventRecordDTO

use of com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTO in project kylo by Teradata.

the class ProvenanceEventDtoBuilder method build.

/**
 * Build the event record
 * @return the event record
 */
public ProvenanceEventRecordDTO build() {
    if (this.eventId == null) {
        this.eventId = LongIdGenerator.nextId();
    }
    if (this.eventType == null) {
        this.eventType = EventType.DROP;
    }
    if (StringUtils.isBlank(componentType)) {
        componentType = "CustomComponent";
    }
    Long now = DateTime.now().getMillis();
    if (startTime == null) {
        startTime = now;
    }
    if (eventTime == null) {
        eventTime = now;
    }
    if (jobFlowFileId == null) {
        jobFlowFileId = flowFileId;
    }
    ProvenanceEventRecordDTO event = new ProvenanceEventRecordDTO();
    event.setFeedName(feedName);
    event.setFirstEventProcessorId(firstEventProcessorId);
    event.setJobFlowFileId(jobFlowFileId);
    event.setFlowFileUuid(flowFileId);
    event.setComponentName(componentName);
    event.setStartTime(startTime);
    event.setEventTime(eventTime);
    event.setStream(stream);
    event.setIsFinalJobEvent(endingEvent);
    event.setComponentId(UUID.randomUUID().toString());
    event.setEventId(eventId);
    event.setIsStartOfJob(startingEvent);
    event.setEventType(eventType.name());
    if (startingEvent) {
        event.setEventType("CREATE");
    }
    event.setComponentType(componentType);
    Map<String, String> attrs = new HashMap<>();
    if (updatedAttributes == null) {
        updatedAttributes = attrs;
    }
    if (previousAttributes == null) {
        previousAttributes = new HashMap<>();
    }
    event.setUpdatedAttributes(updatedAttributes);
    event.setPreviousAttributes(previousAttributes);
    Map<String, String> attributeMap = updatedAttributes;
    if (!previousAttributes.isEmpty()) {
        attributeMap = new HashMap<>();
        attributeMap.putAll(previousAttributes);
        attributeMap.putAll(updatedAttributes);
    }
    event.setAttributeMap(attributeMap);
    return event;
}
Also used : ProvenanceEventRecordDTO(com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTO) HashMap(java.util.HashMap)

Example 12 with ProvenanceEventRecordDTO

use of com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTO in project kylo by Teradata.

the class JpaBatchJobExecutionProvider method getOrCreateStreamJobExecution.

private JpaBatchJobExecution getOrCreateStreamJobExecution(ProvenanceEventRecordDTO event, OpsManagerFeed feed) {
    JpaBatchJobExecution jobExecution = null;
    boolean isNew = false;
    try {
        BatchJobExecution latestJobExecution = latestStreamingJobByFeedName.get(event.getFeedName());
        if (latestJobExecution == null) {
            latestJobExecution = findLatestJobForFeed(event.getFeedName());
        } else {
            if (clusterService.isClustered()) {
                latestJobExecution = jobExecutionRepository.findOne(latestJobExecution.getJobExecutionId());
            }
        }
        if (latestJobExecution == null || (latestJobExecution != null && !latestJobExecution.isStream())) {
            // If the latest Job is not set to be a Stream and its still running we need to fail it and create the new streaming job.
            if (latestJobExecution != null && !latestJobExecution.isFinished()) {
                ProvenanceEventRecordDTO tempFailedEvent = new ProvenanceEventRecordDTO();
                tempFailedEvent.setFeedName(event.getFeedName());
                tempFailedEvent.setAttributeMap(new HashMap<>());
                tempFailedEvent.setIsFailure(true);
                tempFailedEvent.setDetails("Failed Running Batch event as this Feed has now become a Stream");
                finishJob(tempFailedEvent, (JpaBatchJobExecution) latestJobExecution);
                latestJobExecution.setExitMessage("Failed Running Batch event as this Feed has now become a Stream");
                save(latestJobExecution);
            }
            jobExecution = createNewJobExecution(event, feed);
            jobExecution.setStream(true);
            latestStreamingJobByFeedName.put(event.getFeedName(), jobExecution);
            log.info("Created new Streaming Job Execution with id of {} and starting event {} ", jobExecution.getJobExecutionId(), event);
        } else {
            jobExecution = (JpaBatchJobExecution) latestJobExecution;
        }
        if (jobExecution != null) {
            latestStreamingJobByFeedName.put(event.getFeedName(), jobExecution);
        }
    } catch (OptimisticLockException e) {
        // read
        jobExecution = (JpaBatchJobExecution) findLatestJobForFeed(event.getFeedName());
    }
    boolean save = isNew;
    if (!jobExecution.isStream()) {
        jobExecution.setStream(true);
        save = true;
    }
    if (save) {
        save(jobExecution);
    }
    return jobExecution;
}
Also used : BatchJobExecution(com.thinkbiganalytics.metadata.api.jobrepo.job.BatchJobExecution) ProvenanceEventRecordDTO(com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTO) OptimisticLockException(javax.persistence.OptimisticLockException)

Example 13 with ProvenanceEventRecordDTO

use of com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTO in project kylo by Teradata.

the class SparkProvenance method run.

private void run(@Nonnull final PrintStream out, @Nonnull final String... args) {
    // Check how many arguments were passed in
    if (args.length < 5) {
        String msg = "Proper Usage is: <flowfile-id> <job-flowfile-id> <feed-name (category.feed)> <connection-url (url to connect to JMS or KAFAK)> <type (JMS, KAFKA)>" + "You provided " + args.length + " args which are (comma separated): " + StringUtils.join(args, ",");
        out.println(msg);
        throw new IllegalArgumentException(msg);
    }
    ProvenanceEventService provenanceEventService = null;
    final SparkContext sparkContext = SparkContext.getOrCreate();
    try {
        final SparkProvenanceConfiguration params = new SparkProvenanceConfiguration(args);
        // Get the proper ProvenanceService
        provenanceEventService = ProvenanceServiceFactory.getProvenanceEventService(params);
        // Collection of custom Provenance Events we will be sending to Kylo
        List<ProvenanceEventRecordDTO> events = new ArrayList<>();
        // do some work.  Look up the database names in Hive
        final HiveContext hiveContext = new HiveContext(sparkContext);
        // Do some work... i.e. look up the Databases in Hive
        ProvenanceEventRecordDTO event = newEvent("Databases", params);
        Dataset df = hiveContext.sql("show databases");
        event.getAttributeMap().put("databases", df.toJSON().collectAsList().toString());
        event.setEventTime(System.currentTimeMillis());
        events.add(event);
        event = newEvent("Another Step", params);
        event.getAttributeMap().put("UUID 1", UUID.randomUUID().toString());
        event.setEventTime(System.currentTimeMillis());
        event.getAttributeMap().put("timestamp", String.valueOf(System.currentTimeMillis()));
        events.add(event);
        // Send the events off
        provenanceEventService.sendEvents(events);
        log.info("Spark app finished");
    } catch (Exception e) {
        log.error("Failed to run Spark Provenance Job: {}", e.toString(), e);
    } finally {
        provenanceEventService.closeConnection();
        sparkContext.stop();
        log.info("Exiting!!!!");
        System.exit(0);
    }
}
Also used : ProvenanceEventService(com.thinkbiganalytics.provenance.api.ProvenanceEventService) SparkContext(org.apache.spark.SparkContext) ProvenanceEventRecordDTO(com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTO) Dataset(org.apache.spark.sql.Dataset) ArrayList(java.util.ArrayList) HiveContext(org.apache.spark.sql.hive.HiveContext)

Aggregations

ProvenanceEventRecordDTO (com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTO)13 ArrayList (java.util.ArrayList)8 ProvenanceEventRecordDTOHolder (com.thinkbiganalytics.nifi.provenance.model.ProvenanceEventRecordDTOHolder)5 HashMap (java.util.HashMap)4 AggregatedFeedProcessorStatisticsHolder (com.thinkbiganalytics.nifi.provenance.model.stats.AggregatedFeedProcessorStatisticsHolder)3 List (java.util.List)3 Map (java.util.Map)3 OpsManagerFeed (com.thinkbiganalytics.metadata.api.feed.OpsManagerFeed)2 BatchJobExecution (com.thinkbiganalytics.metadata.api.jobrepo.job.BatchJobExecution)2 AggregatedFeedProcessorStatistics (com.thinkbiganalytics.nifi.provenance.model.stats.AggregatedFeedProcessorStatistics)2 ProvenanceEventService (com.thinkbiganalytics.provenance.api.ProvenanceEventService)2 ProvenanceException (com.thinkbiganalytics.provenance.api.ProvenanceException)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2 TimeUnit (java.util.concurrent.TimeUnit)2 Lock (java.util.concurrent.locks.Lock)2 Collectors (java.util.stream.Collectors)2 Cache (com.google.common.cache.Cache)1 CacheBuilder (com.google.common.cache.CacheBuilder)1 ThreadFactoryBuilder (com.google.common.util.concurrent.ThreadFactoryBuilder)1 ClusterService (com.thinkbiganalytics.cluster.ClusterService)1