Search in sources :

Example 1 with IterationStatistics

use of edu.usf.cutr.gtfsrtvalidator.lib.validation.IterationStatistics in project gtfs-realtime-validator by CUTR-at-USF.

the class BatchProcessor method processFeeds.

/**
 * Process the GTFS and GTFS-realtime feeds provided in the constructor. If setReturnStatistics() is set to true,
 * the method will return a list of IterationStatistics (one per GTFS-rt file) for performance in the batch
 * validation.  By default this method will return null to avoid memory issues with extremely large batch processes.
 *
 * @return If setReturnStatistics() is set to true, it will return a list of IterationStatistics (one per GTFS-rt
 * file) for performance in the batch validation.  By default this method will return null to avoid memory issues
 * when processing an extremely large number of files.
 * @throws NoSuchAlgorithmException If the MD5 hash algorithm (used to determine feed uniqueness) is not available on the machine executing the code
 * @throws IOException              If the GTFS or GTFS-realtime files cannot be read or the results cannot be written to disk
 */
public List<IterationStatistics> processFeeds() throws NoSuchAlgorithmException, IOException {
    // Read GTFS data into a GtfsDaoImpl
    _log.info("Starting batch processor...");
    if (mReturnStatistics) {
        mIterationStatistics = new ArrayList<>();
    }
    String timeZoneText = null;
    double gtfsReadTime = readGtfsData();
    Collection<Agency> agencies = mGtfsData.getAllAgencies();
    for (Agency agency : agencies) {
        timeZoneText = agency.getTimezone();
        break;
    }
    mGtfsMetadata = new GtfsMetadata(mPathToGtfsFile.getAbsolutePath(), TimeZone.getTimeZone(timeZoneText), mGtfsData, mIgnoreShapes);
    // Initialize validation rules
    synchronized (mValidationRules) {
        if (mValidationRules.isEmpty()) {
            mValidationRules.add(new CrossFeedDescriptorValidator());
            mValidationRules.add(new VehicleValidator());
            mValidationRules.add(new TimestampValidator());
            mValidationRules.add(new StopTimeUpdateValidator());
            mValidationRules.add(new TripDescriptorValidator());
            mValidationRules.add(new StopValidator());
            mValidationRules.add(new FrequencyTypeZeroValidator());
            mValidationRules.add(new FrequencyTypeOneValidator());
            mValidationRules.add(new HeaderValidator());
        }
    }
    // Configure output
    ObjectMapper mapper = new ObjectMapper();
    mapper.enable(SerializationFeature.INDENT_OUTPUT);
    _log.info("Sorting GTFS-rt files by " + mSortBy.name() + "...");
    // Read GTFS-rt protobuf files from provided directory
    List<Path> paths = Files.walk(Paths.get(mPathToGtfsRealtime)).filter(Files::isRegularFile).sorted((o1, o2) -> {
        if (mSortBy.equals(SortBy.DATE_MODIFIED)) {
            try {
                // Sort by date modified (ascending) (it seems more consistent cross-platform than "date created")
                return SortUtils.compareByDateModified(o1, o2);
            } catch (IOException e) {
                _log.error("Can't sort GTFS-rt files by date - assuming dates are equal: " + e);
            }
            // Assume file dates are equal if we get an exception
            return 0;
        } else {
            // Sort by name (ascending)
            return SortUtils.compareByFileName(o1, o2);
        }
    }).collect(Collectors.toList());
    MessageDigest md = MessageDigest.getInstance("MD5");
    GtfsRealtime.FeedMessage prevMessage = null;
    byte[] prevHash = null;
    for (Path path : paths) {
        IterationStatistics stats = null;
        if (mReturnStatistics) {
            stats = new IterationStatistics();
            stats.setGtfsReadTime(gtfsReadTime);
        }
        long startTimeNanos = System.nanoTime();
        long startToByteArray = System.nanoTime();
        byte[] protobuf;
        try {
            protobuf = IOUtils.toByteArray(Files.newInputStream(path));
        } catch (IOException e) {
            _log.error("Error reading GTFS-rt file to byte array, skipping to next file: " + e);
            continue;
        }
        double toByteArray = getElapsedTime(startToByteArray, System.nanoTime());
        _log.info("Read " + path.getFileName() + " to byte array in " + getElapsedTimeString(toByteArray));
        if (mReturnStatistics) {
            stats.setToByteArrayTime(toByteArray);
        }
        byte[] currentHash = md.digest(protobuf);
        if (MessageDigest.isEqual(currentHash, prevHash)) {
            // This feed file is a duplicate of the last one - skip to next file
            continue;
        }
        long timestamp;
        if (mSortBy.equals(SortBy.DATE_MODIFIED)) {
            // Use file last modified date as "current" timestamp
            timestamp = Files.getLastModifiedTime(path).toMillis();
        } else {
            // Use time parsed from file name as "current" timestamp
            try {
                timestamp = TimestampUtils.getTimestampFromFileName(path.toFile().getName());
            } catch (DateTimeParseException | StringIndexOutOfBoundsException e) {
                _log.error("Couldn't parse timestamp from file name '" + path.toFile().getName() + "' - using date modified instead: " + e);
                timestamp = Files.getLastModifiedTime(path).toMillis();
            }
        }
        long startProtobufDecode = System.nanoTime();
        GtfsRealtime.FeedMessage message;
        try {
            message = GtfsRealtime.FeedMessage.parseFrom(protobuf);
        } catch (InvalidProtocolBufferException e) {
            _log.error("Error reading GTFS-rt message from byte array, skipping to next file: " + e);
            continue;
        }
        double pbDecode = getElapsedTime(startProtobufDecode, System.nanoTime());
        _log.info("Decoded " + path.getFileName() + " protobuf in " + getElapsedTimeString(pbDecode));
        if (mReturnStatistics) {
            stats.setDecodeProtobufTime(pbDecode);
        }
        GtfsRealtime.FeedMessage combinedMessage = null;
        // See if more than one entity type exists in this feed
        if (GtfsUtils.isCombinedFeed(message)) {
            // Run CrossFeedDescriptorValidator on this message
            combinedMessage = message;
        }
        List<ErrorListHelperModel> allErrorLists = new ArrayList<>();
        StringBuilder consoleOutput = new StringBuilder();
        List<RuleStatistics> ruleStatistics = null;
        if (mReturnStatistics) {
            ruleStatistics = new ArrayList<>();
        }
        for (FeedEntityValidator rule : mValidationRules) {
            long startRuleNanos = System.nanoTime();
            List<ErrorListHelperModel> errorLists = rule.validate(timestamp, mGtfsData, mGtfsMetadata, message, prevMessage, combinedMessage);
            allErrorLists.addAll(errorLists);
            double ruleExecutionTime = getElapsedTime(startRuleNanos, System.nanoTime());
            consoleOutput.append("\n" + rule.getClass().getSimpleName() + " - rule = " + getElapsedTimeString(ruleExecutionTime));
            if (mReturnStatistics) {
                RuleStatistics ruleStat = new RuleStatistics();
                ruleStat.setRuleExecutionTime(ruleExecutionTime);
                ruleStat.setValidator(rule.getClass().getSimpleName());
                ruleStatistics.add(ruleStat);
            }
        }
        double totalIterationTime = getElapsedTime(startTimeNanos, System.nanoTime());
        consoleOutput.append("\nProcessed " + path.getFileName() + " in " + getElapsedTimeString(totalIterationTime));
        consoleOutput.append("\n---------------------");
        _log.info(consoleOutput.toString());
        if (mReturnStatistics) {
            stats.setRuleStatistics(ruleStatistics);
            stats.setTotalIterationTime(totalIterationTime);
        }
        // Write validation results for this file to JSON
        writeResults(mapper, path, allErrorLists);
        if (mPlainTextExtension != null) {
            // Write plain text version of protocol buffer
            writePlainText(message, mapper, path);
        }
        if (mReturnStatistics) {
            mIterationStatistics.add(stats);
        }
        prevHash = currentHash;
        prevMessage = message;
    }
    return mIterationStatistics;
}
Also used : GtfsUtils(edu.usf.cutr.gtfsrtvalidator.lib.util.GtfsUtils) edu.usf.cutr.gtfsrtvalidator.lib.validation.rules(edu.usf.cutr.gtfsrtvalidator.lib.validation.rules) ErrorListHelperModel(edu.usf.cutr.gtfsrtvalidator.lib.model.helper.ErrorListHelperModel) RuleStatistics(edu.usf.cutr.gtfsrtvalidator.lib.validation.RuleStatistics) MessageDigest(java.security.MessageDigest) TimestampUtils(edu.usf.cutr.gtfsrtvalidator.lib.util.TimestampUtils) LoggerFactory(org.slf4j.LoggerFactory) TimestampUtils.getElapsedTime(edu.usf.cutr.gtfsrtvalidator.lib.util.TimestampUtils.getElapsedTime) ArrayList(java.util.ArrayList) IterationStatistics(edu.usf.cutr.gtfsrtvalidator.lib.validation.IterationStatistics) TextFormat(com.google.protobuf.TextFormat) Path(java.nio.file.Path) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) GtfsRealtime(com.google.transit.realtime.GtfsRealtime) FeedEntityValidator(edu.usf.cutr.gtfsrtvalidator.lib.validation.interfaces.FeedEntityValidator) Files(java.nio.file.Files) TimeZone(java.util.TimeZone) Collection(java.util.Collection) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) GtfsDaoImpl(org.onebusaway.gtfs.impl.GtfsDaoImpl) Collectors(java.util.stream.Collectors) GtfsMetadata(edu.usf.cutr.gtfsrtvalidator.lib.validation.GtfsMetadata) GtfsReader(org.onebusaway.gtfs.serialization.GtfsReader) IOUtils(org.apache.commons.io.IOUtils) TimestampUtils.getElapsedTimeString(edu.usf.cutr.gtfsrtvalidator.lib.util.TimestampUtils.getElapsedTimeString) DateTimeParseException(java.time.format.DateTimeParseException) List(java.util.List) java.io(java.io) SortUtils(edu.usf.cutr.gtfsrtvalidator.lib.util.SortUtils) Paths(java.nio.file.Paths) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) Agency(org.onebusaway.gtfs.model.Agency) SerializationFeature(com.fasterxml.jackson.databind.SerializationFeature) IterationStatistics(edu.usf.cutr.gtfsrtvalidator.lib.validation.IterationStatistics) GtfsMetadata(edu.usf.cutr.gtfsrtvalidator.lib.validation.GtfsMetadata) ArrayList(java.util.ArrayList) TimestampUtils.getElapsedTimeString(edu.usf.cutr.gtfsrtvalidator.lib.util.TimestampUtils.getElapsedTimeString) ErrorListHelperModel(edu.usf.cutr.gtfsrtvalidator.lib.model.helper.ErrorListHelperModel) RuleStatistics(edu.usf.cutr.gtfsrtvalidator.lib.validation.RuleStatistics) Files(java.nio.file.Files) MessageDigest(java.security.MessageDigest) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Path(java.nio.file.Path) FeedEntityValidator(edu.usf.cutr.gtfsrtvalidator.lib.validation.interfaces.FeedEntityValidator) Agency(org.onebusaway.gtfs.model.Agency) InvalidProtocolBufferException(com.google.protobuf.InvalidProtocolBufferException) GtfsRealtime(com.google.transit.realtime.GtfsRealtime) DateTimeParseException(java.time.format.DateTimeParseException)

Example 2 with IterationStatistics

use of edu.usf.cutr.gtfsrtvalidator.lib.validation.IterationStatistics in project gtfs-realtime-validator by CUTR-at-USF.

the class Main method main.

public static void main(String[] args) throws InterruptedException, ParseException {
    // Parse command line parameters
    Options options = setupCommandLineOptions();
    // Process archived files and then terminate
    String gtfs = getGtfsPathAndFileFromArgs(options, args);
    String gtfsRealtime = getGtfsRealtimePath(options, args);
    if (gtfs == null || gtfsRealtime == null) {
        throw new IllegalArgumentException("For batch mode you must provide a path and file name to GTFS data (e.g., -gtfs /dir/gtfs.zip) and path to directory of all archived GTFS-rt files (e.g., -gtfs-realtime-path /dir/gtfsarchive)");
    }
    BatchProcessor.SortBy sortBy = getSortBy(options, args);
    String plainText = getPlainTextFileExtensionfromArgs(options, args);
    boolean returnStats = getReturnStatsFromArgs(options, args);
    boolean ignoreShapes = getIgnoreShapesFromArgs(options, args);
    BatchProcessor.Builder builder = new BatchProcessor.Builder(gtfs, gtfsRealtime).sortBy(sortBy).setPlainTextExtension(plainText).setReturnStatistics(returnStats).setIgnoreShapes(ignoreShapes);
    BatchProcessor processor = builder.build();
    try {
        List<IterationStatistics> stats = processor.processFeeds();
        if (returnStats) {
            _log.info("-------------------------");
            _log.info("  Validation Statistics");
            _log.info("-------------------------");
            for (IterationStatistics stat : stats) {
                _log.info(stat.toString());
            }
        }
    } catch (IOException | NoSuchAlgorithmException e) {
        _log.error("Error running batch processor: " + e);
    }
}
Also used : IterationStatistics(edu.usf.cutr.gtfsrtvalidator.lib.validation.IterationStatistics) IOException(java.io.IOException) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) BatchProcessor(edu.usf.cutr.gtfsrtvalidator.lib.batch.BatchProcessor)

Aggregations

IterationStatistics (edu.usf.cutr.gtfsrtvalidator.lib.validation.IterationStatistics)2 NoSuchAlgorithmException (java.security.NoSuchAlgorithmException)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 SerializationFeature (com.fasterxml.jackson.databind.SerializationFeature)1 InvalidProtocolBufferException (com.google.protobuf.InvalidProtocolBufferException)1 TextFormat (com.google.protobuf.TextFormat)1 GtfsRealtime (com.google.transit.realtime.GtfsRealtime)1 BatchProcessor (edu.usf.cutr.gtfsrtvalidator.lib.batch.BatchProcessor)1 ErrorListHelperModel (edu.usf.cutr.gtfsrtvalidator.lib.model.helper.ErrorListHelperModel)1 GtfsUtils (edu.usf.cutr.gtfsrtvalidator.lib.util.GtfsUtils)1 SortUtils (edu.usf.cutr.gtfsrtvalidator.lib.util.SortUtils)1 TimestampUtils (edu.usf.cutr.gtfsrtvalidator.lib.util.TimestampUtils)1 TimestampUtils.getElapsedTime (edu.usf.cutr.gtfsrtvalidator.lib.util.TimestampUtils.getElapsedTime)1 TimestampUtils.getElapsedTimeString (edu.usf.cutr.gtfsrtvalidator.lib.util.TimestampUtils.getElapsedTimeString)1 GtfsMetadata (edu.usf.cutr.gtfsrtvalidator.lib.validation.GtfsMetadata)1 RuleStatistics (edu.usf.cutr.gtfsrtvalidator.lib.validation.RuleStatistics)1 FeedEntityValidator (edu.usf.cutr.gtfsrtvalidator.lib.validation.interfaces.FeedEntityValidator)1 edu.usf.cutr.gtfsrtvalidator.lib.validation.rules (edu.usf.cutr.gtfsrtvalidator.lib.validation.rules)1 java.io (java.io)1 IOException (java.io.IOException)1