use of edu.usf.cutr.gtfsrtvalidator.lib.validation.IterationStatistics in project gtfs-realtime-validator by CUTR-at-USF.
the class BatchProcessor method processFeeds.
/**
* Process the GTFS and GTFS-realtime feeds provided in the constructor. If setReturnStatistics() is set to true,
* the method will return a list of IterationStatistics (one per GTFS-rt file) for performance in the batch
* validation. By default this method will return null to avoid memory issues with extremely large batch processes.
*
* @return If setReturnStatistics() is set to true, it will return a list of IterationStatistics (one per GTFS-rt
* file) for performance in the batch validation. By default this method will return null to avoid memory issues
* when processing an extremely large number of files.
* @throws NoSuchAlgorithmException If the MD5 hash algorithm (used to determine feed uniqueness) is not available on the machine executing the code
* @throws IOException If the GTFS or GTFS-realtime files cannot be read or the results cannot be written to disk
*/
public List<IterationStatistics> processFeeds() throws NoSuchAlgorithmException, IOException {
// Read GTFS data into a GtfsDaoImpl
_log.info("Starting batch processor...");
if (mReturnStatistics) {
mIterationStatistics = new ArrayList<>();
}
String timeZoneText = null;
double gtfsReadTime = readGtfsData();
Collection<Agency> agencies = mGtfsData.getAllAgencies();
for (Agency agency : agencies) {
timeZoneText = agency.getTimezone();
break;
}
mGtfsMetadata = new GtfsMetadata(mPathToGtfsFile.getAbsolutePath(), TimeZone.getTimeZone(timeZoneText), mGtfsData, mIgnoreShapes);
// Initialize validation rules
synchronized (mValidationRules) {
if (mValidationRules.isEmpty()) {
mValidationRules.add(new CrossFeedDescriptorValidator());
mValidationRules.add(new VehicleValidator());
mValidationRules.add(new TimestampValidator());
mValidationRules.add(new StopTimeUpdateValidator());
mValidationRules.add(new TripDescriptorValidator());
mValidationRules.add(new StopValidator());
mValidationRules.add(new FrequencyTypeZeroValidator());
mValidationRules.add(new FrequencyTypeOneValidator());
mValidationRules.add(new HeaderValidator());
}
}
// Configure output
ObjectMapper mapper = new ObjectMapper();
mapper.enable(SerializationFeature.INDENT_OUTPUT);
_log.info("Sorting GTFS-rt files by " + mSortBy.name() + "...");
// Read GTFS-rt protobuf files from provided directory
List<Path> paths = Files.walk(Paths.get(mPathToGtfsRealtime)).filter(Files::isRegularFile).sorted((o1, o2) -> {
if (mSortBy.equals(SortBy.DATE_MODIFIED)) {
try {
// Sort by date modified (ascending) (it seems more consistent cross-platform than "date created")
return SortUtils.compareByDateModified(o1, o2);
} catch (IOException e) {
_log.error("Can't sort GTFS-rt files by date - assuming dates are equal: " + e);
}
// Assume file dates are equal if we get an exception
return 0;
} else {
// Sort by name (ascending)
return SortUtils.compareByFileName(o1, o2);
}
}).collect(Collectors.toList());
MessageDigest md = MessageDigest.getInstance("MD5");
GtfsRealtime.FeedMessage prevMessage = null;
byte[] prevHash = null;
for (Path path : paths) {
IterationStatistics stats = null;
if (mReturnStatistics) {
stats = new IterationStatistics();
stats.setGtfsReadTime(gtfsReadTime);
}
long startTimeNanos = System.nanoTime();
long startToByteArray = System.nanoTime();
byte[] protobuf;
try {
protobuf = IOUtils.toByteArray(Files.newInputStream(path));
} catch (IOException e) {
_log.error("Error reading GTFS-rt file to byte array, skipping to next file: " + e);
continue;
}
double toByteArray = getElapsedTime(startToByteArray, System.nanoTime());
_log.info("Read " + path.getFileName() + " to byte array in " + getElapsedTimeString(toByteArray));
if (mReturnStatistics) {
stats.setToByteArrayTime(toByteArray);
}
byte[] currentHash = md.digest(protobuf);
if (MessageDigest.isEqual(currentHash, prevHash)) {
// This feed file is a duplicate of the last one - skip to next file
continue;
}
long timestamp;
if (mSortBy.equals(SortBy.DATE_MODIFIED)) {
// Use file last modified date as "current" timestamp
timestamp = Files.getLastModifiedTime(path).toMillis();
} else {
// Use time parsed from file name as "current" timestamp
try {
timestamp = TimestampUtils.getTimestampFromFileName(path.toFile().getName());
} catch (DateTimeParseException | StringIndexOutOfBoundsException e) {
_log.error("Couldn't parse timestamp from file name '" + path.toFile().getName() + "' - using date modified instead: " + e);
timestamp = Files.getLastModifiedTime(path).toMillis();
}
}
long startProtobufDecode = System.nanoTime();
GtfsRealtime.FeedMessage message;
try {
message = GtfsRealtime.FeedMessage.parseFrom(protobuf);
} catch (InvalidProtocolBufferException e) {
_log.error("Error reading GTFS-rt message from byte array, skipping to next file: " + e);
continue;
}
double pbDecode = getElapsedTime(startProtobufDecode, System.nanoTime());
_log.info("Decoded " + path.getFileName() + " protobuf in " + getElapsedTimeString(pbDecode));
if (mReturnStatistics) {
stats.setDecodeProtobufTime(pbDecode);
}
GtfsRealtime.FeedMessage combinedMessage = null;
// See if more than one entity type exists in this feed
if (GtfsUtils.isCombinedFeed(message)) {
// Run CrossFeedDescriptorValidator on this message
combinedMessage = message;
}
List<ErrorListHelperModel> allErrorLists = new ArrayList<>();
StringBuilder consoleOutput = new StringBuilder();
List<RuleStatistics> ruleStatistics = null;
if (mReturnStatistics) {
ruleStatistics = new ArrayList<>();
}
for (FeedEntityValidator rule : mValidationRules) {
long startRuleNanos = System.nanoTime();
List<ErrorListHelperModel> errorLists = rule.validate(timestamp, mGtfsData, mGtfsMetadata, message, prevMessage, combinedMessage);
allErrorLists.addAll(errorLists);
double ruleExecutionTime = getElapsedTime(startRuleNanos, System.nanoTime());
consoleOutput.append("\n" + rule.getClass().getSimpleName() + " - rule = " + getElapsedTimeString(ruleExecutionTime));
if (mReturnStatistics) {
RuleStatistics ruleStat = new RuleStatistics();
ruleStat.setRuleExecutionTime(ruleExecutionTime);
ruleStat.setValidator(rule.getClass().getSimpleName());
ruleStatistics.add(ruleStat);
}
}
double totalIterationTime = getElapsedTime(startTimeNanos, System.nanoTime());
consoleOutput.append("\nProcessed " + path.getFileName() + " in " + getElapsedTimeString(totalIterationTime));
consoleOutput.append("\n---------------------");
_log.info(consoleOutput.toString());
if (mReturnStatistics) {
stats.setRuleStatistics(ruleStatistics);
stats.setTotalIterationTime(totalIterationTime);
}
// Write validation results for this file to JSON
writeResults(mapper, path, allErrorLists);
if (mPlainTextExtension != null) {
// Write plain text version of protocol buffer
writePlainText(message, mapper, path);
}
if (mReturnStatistics) {
mIterationStatistics.add(stats);
}
prevHash = currentHash;
prevMessage = message;
}
return mIterationStatistics;
}
use of edu.usf.cutr.gtfsrtvalidator.lib.validation.IterationStatistics in project gtfs-realtime-validator by CUTR-at-USF.
the class Main method main.
public static void main(String[] args) throws InterruptedException, ParseException {
// Parse command line parameters
Options options = setupCommandLineOptions();
// Process archived files and then terminate
String gtfs = getGtfsPathAndFileFromArgs(options, args);
String gtfsRealtime = getGtfsRealtimePath(options, args);
if (gtfs == null || gtfsRealtime == null) {
throw new IllegalArgumentException("For batch mode you must provide a path and file name to GTFS data (e.g., -gtfs /dir/gtfs.zip) and path to directory of all archived GTFS-rt files (e.g., -gtfs-realtime-path /dir/gtfsarchive)");
}
BatchProcessor.SortBy sortBy = getSortBy(options, args);
String plainText = getPlainTextFileExtensionfromArgs(options, args);
boolean returnStats = getReturnStatsFromArgs(options, args);
boolean ignoreShapes = getIgnoreShapesFromArgs(options, args);
BatchProcessor.Builder builder = new BatchProcessor.Builder(gtfs, gtfsRealtime).sortBy(sortBy).setPlainTextExtension(plainText).setReturnStatistics(returnStats).setIgnoreShapes(ignoreShapes);
BatchProcessor processor = builder.build();
try {
List<IterationStatistics> stats = processor.processFeeds();
if (returnStats) {
_log.info("-------------------------");
_log.info(" Validation Statistics");
_log.info("-------------------------");
for (IterationStatistics stat : stats) {
_log.info(stat.toString());
}
}
} catch (IOException | NoSuchAlgorithmException e) {
_log.error("Error running batch processor: " + e);
}
}
Aggregations