use of ca.nrc.cadc.caom2.harvester.state.HarvestState in project caom2db by opencadc.
the class ArtifactHarvester method runIt.
private Boolean runIt() throws Exception {
this.downloadCount = 0;
this.processedCount = 0;
this.start = new Date();
try {
// Determine the state of the last run
HarvestState state = harvestStateDAO.get(source, STATE_CLASS);
this.startDate = state.curLastModified;
// harvest up to a little in the past because the head of
// the sequence may be volatile
long fiveMinAgo = System.currentTimeMillis() - 5 * 60000L;
Date stopDate = new Date(fiveMinAgo);
if (startDate == null) {
log.info("harvest window: null " + this.df.format(stopDate) + " [" + this.batchSize + "]");
} else {
log.info("harvest window: " + this.df.format(startDate) + " " + this.df.format(stopDate) + " [" + this.batchSize + "]");
}
List<ObservationState> observationStates = this.observationDAO.getObservationList(this.collection, this.startDate, stopDate, this.batchSize + 1);
// HarvestState (normal case because query: >= startDate)
if (!observationStates.isEmpty()) {
ListIterator<ObservationState> iter = observationStates.listIterator();
ObservationState curBatchLeader = iter.next();
if (curBatchLeader != null) {
if (state.curLastModified != null) {
log.debug("harvesState: " + format(state.curID) + ", " + this.df.format(state.curLastModified));
}
if (curBatchLeader.getMaxLastModified().equals(state.curLastModified)) {
Observation observation = this.observationDAO.get(curBatchLeader.getID());
log.debug("current batch: " + format(observation.getID()) + ", " + this.df.format(curBatchLeader.getMaxLastModified()));
if (state.curID != null && state.curID.equals(observation.getID())) {
iter.remove();
}
}
}
}
log.info("Found: " + observationStates.size());
for (ObservationState observationState : observationStates) {
try {
this.observationDAO.getTransactionManager().startTransaction();
Observation observation = this.observationDAO.get(observationState.getID());
if (observation == null) {
log.debug("Observation no longer exists: " + observationState.getURI());
} else {
// will make progress even on failures
state.curLastModified = observation.getMaxLastModified();
state.curID = observation.getID();
for (Plane plane : observation.getPlanes()) {
for (Artifact artifact : plane.getArtifacts()) {
Date releaseDate = AccessUtil.getReleaseDate(artifact, plane.metaRelease, plane.dataRelease);
if (releaseDate == null) {
// null date means private
log.debug("null release date, skipping");
} else {
logStart(format(state.curID), artifact);
boolean success = true;
boolean added = false;
String message = null;
this.caomChecksum = getMD5Sum(artifact.contentChecksum);
if (this.caomChecksum == null) {
this.caomChecksum = "null";
}
if (artifact.contentLength == null) {
this.caomContentLength = null;
} else {
this.caomContentLength = artifact.contentLength;
}
this.storageContentLength = 0;
this.reason = "None";
this.errorMessage = null;
this.processedCount++;
if (releaseDate.after(start)) {
// private and release date is not null, download in the future
this.errorMessage = ArtifactHarvester.PROPRIETARY;
}
try {
// by default, do not add to the skip table
boolean correctCopy = true;
// artifact is not in storage if storage policy is 'PUBLIC ONLY' and the artifact is proprietary
if ((StoragePolicy.ALL == storagePolicy) || this.errorMessage == null) {
// correctCopy is false if: checksum mismatch, content length mismatch or not in storage
// "not in storage" includes failing to resolve the artifact URI
correctCopy = checkArtifactInStorage(artifact.getURI());
log.debug("Artifact " + artifact.getURI() + " with MD5 " + artifact.contentChecksum + " correct copy: " + correctCopy);
}
if ((StoragePolicy.PUBLIC_ONLY == storagePolicy && ArtifactHarvester.PROPRIETARY.equals(this.errorMessage)) || !correctCopy) {
HarvestSkipURI skip = harvestSkipURIDAO.get(source, STATE_CLASS, artifact.getURI());
if (skip == null) {
// not in skip table, add it
skip = new HarvestSkipURI(source, STATE_CLASS, artifact.getURI(), releaseDate, this.errorMessage);
}
if (ArtifactHarvester.PROPRIETARY.equals(skip.errorMessage) || ArtifactHarvester.PROPRIETARY.equals(this.errorMessage)) {
skip.setTryAfter(releaseDate);
skip.errorMessage = errorMessage;
}
this.harvestSkipURIDAO.put(skip);
this.downloadCount++;
added = true;
if (skip != null) {
this.downloadCount--;
if (ArtifactHarvester.PROPRIETARY.equals(this.errorMessage)) {
this.updateCount++;
message = this.errorMessage + " artifact already exists in skip table, update tryAfter date to release date.";
} else {
added = false;
String msg = "artifact already exists in skip table.";
;
if (this.reason.equalsIgnoreCase("None")) {
this.reason = "Public " + msg;
} else {
this.reason = this.reason + " and public " + msg;
}
}
}
}
} catch (Exception ex) {
success = false;
message = "Failed to determine if artifact " + artifact.getURI() + " exists: " + ex.getMessage();
log.error(message, ex);
}
logEnd(format(state.curID), artifact, success, added, message);
}
}
}
}
this.harvestStateDAO.put(state);
log.debug("Updated artifact harvest state. Date: " + state.curLastModified);
log.debug("Updated artifact harvest state. ID: " + format(state.curID));
this.observationDAO.getTransactionManager().commitTransaction();
} catch (Throwable t) {
this.observationDAO.getTransactionManager().rollbackTransaction();
throw t;
}
}
return (observationStates.size() < batchSize + 1);
} finally {
logBatchEnd();
}
}
use of ca.nrc.cadc.caom2.harvester.state.HarvestState in project caom2db by opencadc.
the class ObservationValidator method initProgressFile.
// Progress file management functions
protected void initProgressFile(String src) throws IOException, ParseException, URISyntaxException {
if (this.progressFile.exists()) {
// read in the file
this.progressRecord = readProgressFile();
if (!progressRecord.getSource().equals(src)) {
throw new IllegalStateException("existing progress file has different source: " + progressRecord.getSource());
}
} else {
// Initialize the file
progressRecord = new HarvestState(src, Observation.class.getSimpleName());
writeProgressFile(progressRecord);
}
}
use of ca.nrc.cadc.caom2.harvester.state.HarvestState in project caom2db by opencadc.
the class ObservationRemover method run.
@Override
public void run() {
// Get HarvestState record to see if this collection has actually been harvested before
HarvestStateDAO harvestStateDAO = new PostgresqlHarvestStateDAO(obsDAO.getDataSource(), target.getDatabase(), target.getSchema());
HarvestState harvestStateRec = null;
if (src != null) {
harvestStateRec = harvestStateDAO.get(src.getIdentifier(), Observation.class.getSimpleName());
// in this case, getLastModified is null
if (harvestStateRec.getLastModified() == null) {
log.error("no found: HarvestState record for " + src.getIdentifier());
return;
}
}
log.info("Using batchSize: " + batchSize);
log.info("Removing Observation(s) ...");
Progress prog = null;
int total = 0;
boolean go = true;
while (go) {
prog = deleteObservations();
if (prog.found > 0) {
log.info("finished batch: " + prog.toString());
}
if (prog.abort) {
log.error("batch aborted");
}
total += prog.found;
go = (prog.found > 0 && !prog.abort);
}
log.info("Removed " + total + " observations");
if (prog.abort) {
log.warn("Problem removing observations. Quitting...");
return;
}
log.info("Removing DeletedObservation(s)...");
total = 0;
go = true;
while (go) {
prog = deleteDeletedObservations();
if (prog.found > 0) {
log.info("finished batch: " + prog.toString());
}
if (prog.abort) {
log.error("batch aborted");
}
total += prog.found;
go = (prog.found > 0 && !prog.abort);
}
log.info("Removed " + total + " DeletedObservation(s)");
if (prog.abort) {
log.warn("Problem removing DeletedObservation(s). Quitting...");
return;
}
if (src != null) {
log.info("Removing HarvesetSkipURI(s)...");
total = 0;
go = true;
while (go) {
prog = deleteHarvestSkipURI();
if (prog.found > 0) {
log.info("finished batch: " + prog.toString());
}
if (prog.abort) {
log.error("batch aborted");
}
total += prog.found;
go = (prog.found > 0 && !prog.abort);
}
log.info("Removed " + total + " HarvesetSkipURI(s)");
if (prog.abort) {
log.warn("Problem removing HarvesetSkipURI(s). Quitting...");
return;
}
}
if (src != null) {
log.info("Deleting harvest state records...");
harvestStateDAO.delete(harvestStateRec);
harvestStateRec = harvestStateDAO.get(src.getIdentifier(), DeletedObservation.class.getSimpleName());
if (harvestStateRec.getLastModified() != null) {
harvestStateDAO.delete(harvestStateRec);
}
}
}
use of ca.nrc.cadc.caom2.harvester.state.HarvestState in project caom2db by opencadc.
the class DeletionHarvester method doit.
/**
* Does the work
*
* @return progress status
*/
private Progress doit() {
log.info("batch: " + entityClass.getSimpleName());
Progress ret = new Progress();
int expectedNum = Integer.MAX_VALUE;
if (batchSize != null) {
expectedNum = batchSize.intValue();
}
boolean correct = true;
try {
HarvestState state = harvestStateDAO.get(source, cname);
log.info("last harvest: " + format(state.curLastModified));
if (initHarvestState && state.curLastModified == null) {
state.curLastModified = initDate;
harvestStateDAO.put(state);
state = harvestStateDAO.get(source, cname);
log.info("harvest state initialised to: " + df.format(state.curLastModified));
}
startDate = state.curLastModified;
if (firstIteration) {
if (super.minDate != null) {
// override state
startDate = super.minDate;
}
endDate = super.maxDate;
// harvest up to a little in the past because the head of the
// sequence may be volatile
long fiveMinAgo = System.currentTimeMillis() - 5 * 60000L;
if (endDate == null) {
endDate = new Date(fiveMinAgo);
} else {
endDate = new Date(Math.min(fiveMinAgo, endDate.getTime()));
}
}
firstIteration = false;
List<DeletedObservation> entityList = null;
String source = null;
if (deletedDAO != null) {
source = "deletedDAO";
entityList = deletedDAO.getList(src.getCollection(), startDate, endDate, batchSize);
} else {
source = "repoClient";
entityList = repoClient.getDeleted(src.getCollection(), startDate, endDate, batchSize);
}
if (entityList == null) {
throw new RuntimeException("Error gathering deleted observations from " + source);
}
if (entityList.size() == expectedNum) {
detectLoop(entityList);
}
ret.found = entityList.size();
log.info("found: " + entityList.size());
ListIterator<DeletedObservation> iter = entityList.listIterator();
while (iter.hasNext()) {
DeletedObservation de = iter.next();
// allow garbage collection asap
iter.remove();
log.debug("Observation read from deletion end-point: " + de.getID() + " date = " + de.getLastModified());
if (!dryrun) {
txnManager.startTransaction();
}
boolean ok = false;
try {
if (!dryrun) {
state.curLastModified = de.getLastModified();
state.curID = de.getID();
ObservationState cur = obsDAO.getState(de.getID());
if (cur != null) {
log.debug("Observation: " + de.getID() + " found in DB");
Date lastUpdate = cur.getMaxLastModified();
Date deleted = de.getLastModified();
log.debug("to be deleted: " + de.getClass().getSimpleName() + " " + de.getURI() + " " + de.getID() + "deleted date " + format(de.getLastModified()) + " modified date " + format(cur.getMaxLastModified()));
if (deleted.after(lastUpdate)) {
log.info("delete: " + de.getClass().getSimpleName() + " " + de.getURI() + " " + de.getID());
obsDAO.delete(de.getID());
ret.deleted++;
} else {
log.info("skip out-of-date delete: " + de.getClass().getSimpleName() + " " + de.getURI() + " " + de.getID() + " " + format(de.getLastModified()));
ret.skipped++;
}
} else {
log.debug("Observation: " + de.getID() + " not found in DB");
}
// track progress
harvestStateDAO.put(state);
log.debug("committing transaction");
txnManager.commitTransaction();
log.debug("commit: OK");
}
ok = true;
} catch (Throwable t) {
log.error("unexpected exception", t);
} finally {
if (!ok && !dryrun) {
log.warn("failed to process " + de + ": trying to rollback the transaction");
txnManager.rollbackTransaction();
log.warn("rollback: OK");
ret.abort = true;
}
}
}
if (ret.found < expectedNum) {
ret.done = true;
if (state != null && state.curLastModified != null && ret.found > 0) {
// tweak HarvestState so we don't keep picking up the same
// one
// 1
Date n = new Date(state.curLastModified.getTime() + 1L);
// ms
// ahead
Date now = new Date();
if (now.getTime() - n.getTime() > 600 * 1000L) {
n = new Date(state.curLastModified.getTime() + 100L);
}
// ahead
state.curLastModified = n;
log.info("reached last " + entityClass.getSimpleName() + ": setting curLastModified to " + format(state.curLastModified));
harvestStateDAO.put(state);
}
}
} catch (Throwable t) {
log.error("unexpected exception", t);
ret.abort = true;
correct = false;
} finally {
if (correct) {
log.debug("DONE");
}
}
return ret;
}
use of ca.nrc.cadc.caom2.harvester.state.HarvestState in project caom2db by opencadc.
the class ObservationValidator method doit.
private Progress doit() {
Progress ret = new Progress();
long t = System.currentTimeMillis();
long timeState = -1;
long timeQuery = -1;
long timeTransaction = -1;
try {
// hint
System.gc();
t = System.currentTimeMillis();
timeState = System.currentTimeMillis() - t;
t = System.currentTimeMillis();
log.info("getObservationList: " + src.getIdentifier());
List<ObservationState> tmpSrcState = null;
if (srcObservationDAO != null) {
tmpSrcState = srcObservationDAO.getObservationList(src.getCollection(), null, null, null);
} else if (srcObservationService != null) {
tmpSrcState = srcObservationService.getObservationList(src.getCollection(), null, null, null);
} else {
throw new RuntimeException("BUG: both srcObservationDAO and srcObservationService are null");
}
log.info("found: " + tmpSrcState.size());
Set<ObservationState> srcState = new TreeSet<>(compStateUri);
srcState.addAll(tmpSrcState);
// GC
tmpSrcState = null;
log.info("source set: " + srcState.size());
log.info("getObservationList: " + dest.getIdentifier());
List<ObservationState> tmpDstState = destObservationDAO.getObservationList(dest.getCollection(), null, null, null);
log.info("found: " + tmpDstState.size());
Set<ObservationState> dstState = new TreeSet<>(compStateUri);
dstState.addAll(tmpDstState);
// GC
tmpDstState = null;
log.info("destination set: " + dstState.size());
Set<ObservationStateError> errlist = calculateErroneousObservationStates(srcState, dstState);
log.info("discrepancies found: " + errlist.size());
timeQuery = System.currentTimeMillis() - t;
t = System.currentTimeMillis();
List<SkippedWrapperURI<ObservationStateError>> entityListSrc = wrap(errlist);
ret.found = srcState.size();
ListIterator<SkippedWrapperURI<ObservationStateError>> iter = entityListSrc.listIterator();
while (iter.hasNext()) {
SkippedWrapperURI<ObservationStateError> ow = iter.next();
ObservationStateError o = ow.entity;
// allow garbage collection during loop
iter.remove();
String skipMsg = null;
try {
if (!dryrun) {
if (o != null) {
skipMsg = o.toString();
try {
log.debug("starting HarvestSkipURI transaction");
boolean putSkip = true;
HarvestSkipURI skip = harvestSkip.get(source, cname, o.getObs().getURI().getURI());
Date tryAfter = o.getObs().maxLastModified;
if (skip == null) {
skip = new HarvestSkipURI(source, cname, o.getObs().getURI().getURI(), tryAfter, skipMsg);
ret.added++;
} else {
// avoid lastModified update for no change
putSkip = false;
ret.known++;
}
if (destObservationDAO.getTransactionManager().isOpen()) {
throw new RuntimeException("BUG: found open trasnaction at start of next observation");
}
log.debug("starting transaction");
destObservationDAO.getTransactionManager().startTransaction();
// track the fail
if (putSkip) {
log.info("put: " + skip);
harvestSkip.put(skip);
} else {
log.info("known: " + skip);
}
} catch (Throwable oops) {
log.warn("failed to insert HarvestSkipURI", oops);
destObservationDAO.getTransactionManager().rollbackTransaction();
log.warn("rollback HarvestSkipURI: OK");
}
}
log.debug("committing transaction");
destObservationDAO.getTransactionManager().commitTransaction();
log.debug("commit: OK");
}
ret.mismatch++;
} catch (Throwable oops) {
String str = oops.toString();
if (oops instanceof Error) {
log.error("FATAL - probably installation or environment", oops);
} else if (oops instanceof NullPointerException) {
log.error("BUG", oops);
} else if (oops instanceof BadSqlGrammarException) {
log.error("BUG", oops);
BadSqlGrammarException bad = (BadSqlGrammarException) oops;
SQLException sex1 = bad.getSQLException();
if (sex1 != null) {
log.error("CAUSE", sex1);
SQLException sex2 = sex1.getNextException();
log.error("NEXT CAUSE", sex2);
}
} else if (oops instanceof DataAccessResourceFailureException) {
log.error("SEVERE PROBLEM - probably out of space in database", oops);
} else if (oops instanceof DataIntegrityViolationException && str.contains("duplicate key value violates unique constraint \"i_observationuri\"")) {
log.error("CONTENT PROBLEM - duplicate observation: " + " " + o.getObs().getURI().getURI().toASCIIString());
} else if (oops instanceof UncategorizedSQLException) {
if (str.contains("spherepoly_from_array")) {
log.error("UNDETECTED illegal polygon: " + o.getObs().getURI().getURI());
} else {
log.error("unexpected exception", oops);
}
} else if (oops instanceof IllegalArgumentException && str.contains("CaomValidator") && str.contains("keywords")) {
log.error("CONTENT PROBLEM - invalid keywords: " + " " + o.getObs().getURI().getURI().toASCIIString());
} else {
log.error("unexpected exception", oops);
}
}
}
} finally {
timeTransaction = System.currentTimeMillis() - t;
log.debug("time to get HarvestState: " + timeState + "ms");
log.debug("time to run ObservationListQuery: " + timeQuery + "ms");
log.debug("time to run transactions: " + timeTransaction + "ms");
}
return ret;
}
Aggregations