use of edu.usf.cutr.gtfsrtvalidator.lib.validation.interfaces.FeedEntityValidator in project gtfs-realtime-validator by CUTR-at-USF.
the class BackgroundTask method run.
@Override
public void run() {
try {
long startTimeNanos = System.nanoTime();
GtfsRealtime.FeedMessage currentFeedMessage;
GtfsRealtime.FeedMessage previousFeedMessage = null;
GtfsDaoImpl gtfsData;
GtfsMetadata gtfsMetadata;
// Holds data needed in the database under each iteration
GtfsRtFeedIterationModel feedIteration;
StringBuffer consoleOutput = new StringBuffer();
// Get the GTFS feed from the GtfsDaoMap using the gtfsFeedId of the current feed.
gtfsData = GtfsFeed.GtfsDaoMap.get(mCurrentGtfsRtFeed.getGtfsFeedModel().getFeedId());
// Create the GTFS metadata if it doesn't already exist
// TODO - read ignoreShapes from website checkbox - see https://github.com/CUTR-at-USF/gtfs-realtime-validator/issues/286
gtfsMetadata = mGtfsMetadata.computeIfAbsent(mCurrentGtfsRtFeed.getGtfsFeedModel().getFeedId(), k -> new GtfsMetadata(mCurrentGtfsRtFeed.getGtfsFeedModel().getGtfsUrl(), TimeZone.getTimeZone(mCurrentGtfsRtFeed.getGtfsFeedModel().getAgency()), gtfsData, mCurrentGtfsRtFeed.getEnableShapes()));
// Read the GTFS-rt feed from the feed URL
URL gtfsRtFeedUrl;
Session session;
try {
gtfsRtFeedUrl = new URL(mCurrentGtfsRtFeed.getGtfsRtUrl());
} catch (MalformedURLException e) {
_log.error("Malformed Url: " + mCurrentGtfsRtFeed.getGtfsRtUrl(), e);
e.printStackTrace();
return;
}
try {
// Get the GTFS-RT feedMessage for this method
long startHttpRequest = System.nanoTime();
InputStream in = gtfsRtFeedUrl.openStream();
consoleOutput.append("\n" + mCurrentGtfsRtFeed.getGtfsRtUrl() + " gtfsRtFeedUrl.openStream() in " + getElapsedTimeString(getElapsedTime(startHttpRequest, System.nanoTime())));
long startToByteArray = System.nanoTime();
byte[] gtfsRtProtobuf = IOUtils.toByteArray(in);
consoleOutput.append("\n" + mCurrentGtfsRtFeed.getGtfsRtUrl() + " IOUtils.toByteArray(in) in " + getElapsedTimeString(getElapsedTime(startToByteArray, System.nanoTime())));
boolean isUniqueFeed = true;
MessageDigest md = MessageDigest.getInstance("MD5");
byte[] prevFeedDigest = null;
byte[] currentFeedDigest = md.digest(gtfsRtProtobuf);
session = GTFSDB.initSessionBeginTrans();
feedIteration = (GtfsRtFeedIterationModel) session.createQuery("FROM GtfsRtFeedIterationModel" + " WHERE rtFeedId = :gtfsRtId" + " ORDER BY IterationId DESC").setParameter("gtfsRtId", mCurrentGtfsRtFeed.getGtfsRtId()).setMaxResults(1).uniqueResult();
if (feedIteration != null) {
prevFeedDigest = feedIteration.getFeedHash();
}
if (MessageDigest.isEqual(currentFeedDigest, prevFeedDigest)) {
// If previous feed digest and newly fetched/current feed digest are equal means, we received the same feed again.
isUniqueFeed = false;
}
long startProtobufDecode = System.nanoTime();
currentFeedMessage = GtfsRealtime.FeedMessage.parseFrom(gtfsRtProtobuf);
consoleOutput.append("\n" + mCurrentGtfsRtFeed.getGtfsRtUrl() + " protobuf decode in " + getElapsedTimeString(getElapsedTime(startProtobufDecode, System.nanoTime())));
_log.info(consoleOutput.toString());
// Clear the buffer for the next set of log statements
consoleOutput.setLength(0);
long feedTimestamp = TimeUnit.SECONDS.toMillis(currentFeedMessage.getHeader().getTimestamp());
// Create new feedIteration object and save the iteration to the database
if (isUniqueFeed) {
if (feedIteration != null && feedIteration.getFeedprotobuf() != null) {
// Get the previous feed message
InputStream previousIs = new ByteArrayInputStream(feedIteration.getFeedprotobuf());
previousFeedMessage = GtfsRealtime.FeedMessage.parseFrom(previousIs);
}
feedIteration = new GtfsRtFeedIterationModel(System.currentTimeMillis(), feedTimestamp, gtfsRtProtobuf, mCurrentGtfsRtFeed, currentFeedDigest);
} else {
feedIteration = new GtfsRtFeedIterationModel(System.currentTimeMillis(), feedTimestamp, null, mCurrentGtfsRtFeed, currentFeedDigest);
}
session.save(feedIteration);
GTFSDB.commitAndCloseSession(session);
if (!isUniqueFeed) {
return;
}
} catch (Exception e) {
_log.error("The URL '" + gtfsRtFeedUrl + "' does not contain valid Gtfs-Rt data", e);
return;
}
// Read all GTFS-rt entities for the current feed
mGtfsRtFeedMap.put(feedIteration.getGtfsRtFeedModel().getGtfsRtId(), currentFeedMessage);
session = GTFSDB.initSessionBeginTrans();
List<GtfsRealtime.FeedEntity> allEntitiesArrayList = new ArrayList<>();
List<GtfsRtFeedModel> gtfsRtFeedModelList;
gtfsRtFeedModelList = session.createQuery("FROM GtfsRtFeedModel" + " WHERE gtfsFeedID = :feedID").setParameter("feedID", mCurrentGtfsRtFeed.getGtfsFeedModel().getFeedId()).list();
GTFSDB.closeSession(session);
while (!mGtfsRtFeedMap.keySet().containsAll(gtfsRtFeedModelList.stream().map(GtfsRtFeedModel::getGtfsRtId).collect(Collectors.toSet()))) {
Thread.sleep(200);
}
GtfsRealtime.FeedHeader header = null;
if (gtfsRtFeedModelList.size() < 1) {
_log.error("The URL '" + gtfsRtFeedUrl + "' is not stored properly into the database");
return;
}
GtfsRealtime.FeedMessage combinedFeed = null;
if (gtfsRtFeedModelList.size() == 1) {
// See if more than one entity type exists in this feed
GtfsRealtime.FeedMessage message = mGtfsRtFeedMap.get(gtfsRtFeedModelList.get(0).getGtfsRtId());
if (GtfsUtils.isCombinedFeed(message)) {
// Run CrossFeedDescriptorValidator on this message
combinedFeed = message;
}
}
if (gtfsRtFeedModelList.size() > 1) {
// We're monitoring multiple GTFS-rt feeds for the same GTFS data - create a combined feed message include all entities for all of those GTFS-rt feeds
_log.debug("Creating combined feed message for " + gtfsRtFeedModelList.toString());
for (GtfsRtFeedModel gtfsRtFeedModel : gtfsRtFeedModelList) {
GtfsRealtime.FeedMessage message = mGtfsRtFeedMap.get(gtfsRtFeedModel.getGtfsRtId());
if (header == null) {
// Save one header to use in our combined feed below
header = message.getHeader();
} else {
if (message.getHeader() != null && message.getHeader().getTimestamp() > header.getTimestamp()) {
// Use largest header timestamp with multiple feeds - see #239
header = message.getHeader();
}
}
if (message != null) {
allEntitiesArrayList.addAll(message.getEntityList());
}
}
GtfsRealtime.FeedMessage.Builder feedMessageBuilder = GtfsRealtime.FeedMessage.newBuilder();
feedMessageBuilder.setHeader(header);
feedMessageBuilder.addAllEntity(allEntitiesArrayList);
combinedFeed = feedMessageBuilder.build();
}
// Use the same current time for all rules for consistency
long currentTimeMillis = System.currentTimeMillis();
// Run validation rules
for (FeedEntityValidator rule : mValidationRules) {
consoleOutput.append(validateEntity(currentTimeMillis, currentFeedMessage, previousFeedMessage, combinedFeed, gtfsData, gtfsMetadata, feedIteration, rule));
}
consoleOutput.append("\nProcessed " + mCurrentGtfsRtFeed.getGtfsRtUrl() + " in " + getElapsedTimeString(getElapsedTime(startTimeNanos, System.nanoTime())));
consoleOutput.append("\n---------------------");
_log.info(consoleOutput.toString());
} catch (Exception ex) {
ex.printStackTrace();
}
}
use of edu.usf.cutr.gtfsrtvalidator.lib.validation.interfaces.FeedEntityValidator in project gtfs-realtime-validator by CUTR-at-USF.
the class BatchProcessor method processFeeds.
/**
* Process the GTFS and GTFS-realtime feeds provided in the constructor. If setReturnStatistics() is set to true,
* the method will return a list of IterationStatistics (one per GTFS-rt file) for performance in the batch
* validation. By default this method will return null to avoid memory issues with extremely large batch processes.
*
* @return If setReturnStatistics() is set to true, it will return a list of IterationStatistics (one per GTFS-rt
* file) for performance in the batch validation. By default this method will return null to avoid memory issues
* when processing an extremely large number of files.
* @throws NoSuchAlgorithmException If the MD5 hash algorithm (used to determine feed uniqueness) is not available on the machine executing the code
* @throws IOException If the GTFS or GTFS-realtime files cannot be read or the results cannot be written to disk
*/
public List<IterationStatistics> processFeeds() throws NoSuchAlgorithmException, IOException {
// Read GTFS data into a GtfsDaoImpl
_log.info("Starting batch processor...");
if (mReturnStatistics) {
mIterationStatistics = new ArrayList<>();
}
String timeZoneText = null;
double gtfsReadTime = readGtfsData();
Collection<Agency> agencies = mGtfsData.getAllAgencies();
for (Agency agency : agencies) {
timeZoneText = agency.getTimezone();
break;
}
mGtfsMetadata = new GtfsMetadata(mPathToGtfsFile.getAbsolutePath(), TimeZone.getTimeZone(timeZoneText), mGtfsData, mIgnoreShapes);
// Initialize validation rules
synchronized (mValidationRules) {
if (mValidationRules.isEmpty()) {
mValidationRules.add(new CrossFeedDescriptorValidator());
mValidationRules.add(new VehicleValidator());
mValidationRules.add(new TimestampValidator());
mValidationRules.add(new StopTimeUpdateValidator());
mValidationRules.add(new TripDescriptorValidator());
mValidationRules.add(new StopValidator());
mValidationRules.add(new FrequencyTypeZeroValidator());
mValidationRules.add(new FrequencyTypeOneValidator());
mValidationRules.add(new HeaderValidator());
}
}
// Configure output
ObjectMapper mapper = new ObjectMapper();
mapper.enable(SerializationFeature.INDENT_OUTPUT);
_log.info("Sorting GTFS-rt files by " + mSortBy.name() + "...");
// Read GTFS-rt protobuf files from provided directory
List<Path> paths = Files.walk(Paths.get(mPathToGtfsRealtime)).filter(Files::isRegularFile).sorted((o1, o2) -> {
if (mSortBy.equals(SortBy.DATE_MODIFIED)) {
try {
// Sort by date modified (ascending) (it seems more consistent cross-platform than "date created")
return SortUtils.compareByDateModified(o1, o2);
} catch (IOException e) {
_log.error("Can't sort GTFS-rt files by date - assuming dates are equal: " + e);
}
// Assume file dates are equal if we get an exception
return 0;
} else {
// Sort by name (ascending)
return SortUtils.compareByFileName(o1, o2);
}
}).collect(Collectors.toList());
MessageDigest md = MessageDigest.getInstance("MD5");
GtfsRealtime.FeedMessage prevMessage = null;
byte[] prevHash = null;
for (Path path : paths) {
IterationStatistics stats = null;
if (mReturnStatistics) {
stats = new IterationStatistics();
stats.setGtfsReadTime(gtfsReadTime);
}
long startTimeNanos = System.nanoTime();
long startToByteArray = System.nanoTime();
byte[] protobuf;
try {
protobuf = IOUtils.toByteArray(Files.newInputStream(path));
} catch (IOException e) {
_log.error("Error reading GTFS-rt file to byte array, skipping to next file: " + e);
continue;
}
double toByteArray = getElapsedTime(startToByteArray, System.nanoTime());
_log.info("Read " + path.getFileName() + " to byte array in " + getElapsedTimeString(toByteArray));
if (mReturnStatistics) {
stats.setToByteArrayTime(toByteArray);
}
byte[] currentHash = md.digest(protobuf);
if (MessageDigest.isEqual(currentHash, prevHash)) {
// This feed file is a duplicate of the last one - skip to next file
continue;
}
long timestamp;
if (mSortBy.equals(SortBy.DATE_MODIFIED)) {
// Use file last modified date as "current" timestamp
timestamp = Files.getLastModifiedTime(path).toMillis();
} else {
// Use time parsed from file name as "current" timestamp
try {
timestamp = TimestampUtils.getTimestampFromFileName(path.toFile().getName());
} catch (DateTimeParseException | StringIndexOutOfBoundsException e) {
_log.error("Couldn't parse timestamp from file name '" + path.toFile().getName() + "' - using date modified instead: " + e);
timestamp = Files.getLastModifiedTime(path).toMillis();
}
}
long startProtobufDecode = System.nanoTime();
GtfsRealtime.FeedMessage message;
try {
message = GtfsRealtime.FeedMessage.parseFrom(protobuf);
} catch (InvalidProtocolBufferException e) {
_log.error("Error reading GTFS-rt message from byte array, skipping to next file: " + e);
continue;
}
double pbDecode = getElapsedTime(startProtobufDecode, System.nanoTime());
_log.info("Decoded " + path.getFileName() + " protobuf in " + getElapsedTimeString(pbDecode));
if (mReturnStatistics) {
stats.setDecodeProtobufTime(pbDecode);
}
GtfsRealtime.FeedMessage combinedMessage = null;
// See if more than one entity type exists in this feed
if (GtfsUtils.isCombinedFeed(message)) {
// Run CrossFeedDescriptorValidator on this message
combinedMessage = message;
}
List<ErrorListHelperModel> allErrorLists = new ArrayList<>();
StringBuilder consoleOutput = new StringBuilder();
List<RuleStatistics> ruleStatistics = null;
if (mReturnStatistics) {
ruleStatistics = new ArrayList<>();
}
for (FeedEntityValidator rule : mValidationRules) {
long startRuleNanos = System.nanoTime();
List<ErrorListHelperModel> errorLists = rule.validate(timestamp, mGtfsData, mGtfsMetadata, message, prevMessage, combinedMessage);
allErrorLists.addAll(errorLists);
double ruleExecutionTime = getElapsedTime(startRuleNanos, System.nanoTime());
consoleOutput.append("\n" + rule.getClass().getSimpleName() + " - rule = " + getElapsedTimeString(ruleExecutionTime));
if (mReturnStatistics) {
RuleStatistics ruleStat = new RuleStatistics();
ruleStat.setRuleExecutionTime(ruleExecutionTime);
ruleStat.setValidator(rule.getClass().getSimpleName());
ruleStatistics.add(ruleStat);
}
}
double totalIterationTime = getElapsedTime(startTimeNanos, System.nanoTime());
consoleOutput.append("\nProcessed " + path.getFileName() + " in " + getElapsedTimeString(totalIterationTime));
consoleOutput.append("\n---------------------");
_log.info(consoleOutput.toString());
if (mReturnStatistics) {
stats.setRuleStatistics(ruleStatistics);
stats.setTotalIterationTime(totalIterationTime);
}
// Write validation results for this file to JSON
writeResults(mapper, path, allErrorLists);
if (mPlainTextExtension != null) {
// Write plain text version of protocol buffer
writePlainText(message, mapper, path);
}
if (mReturnStatistics) {
mIterationStatistics.add(stats);
}
prevHash = currentHash;
prevMessage = message;
}
return mIterationStatistics;
}
Aggregations