use of ca.nrc.cadc.caom2.harvester.state.HarvestSkipURI in project caom2db by opencadc.
the class ArtifactHarvester method runIt.
private Boolean runIt() throws Exception {
this.downloadCount = 0;
this.processedCount = 0;
this.start = new Date();
try {
// Determine the state of the last run
HarvestState state = harvestStateDAO.get(source, STATE_CLASS);
this.startDate = state.curLastModified;
// harvest up to a little in the past because the head of
// the sequence may be volatile
long fiveMinAgo = System.currentTimeMillis() - 5 * 60000L;
Date stopDate = new Date(fiveMinAgo);
if (startDate == null) {
log.info("harvest window: null " + this.df.format(stopDate) + " [" + this.batchSize + "]");
} else {
log.info("harvest window: " + this.df.format(startDate) + " " + this.df.format(stopDate) + " [" + this.batchSize + "]");
}
List<ObservationState> observationStates = this.observationDAO.getObservationList(this.collection, this.startDate, stopDate, this.batchSize + 1);
// HarvestState (normal case because query: >= startDate)
if (!observationStates.isEmpty()) {
ListIterator<ObservationState> iter = observationStates.listIterator();
ObservationState curBatchLeader = iter.next();
if (curBatchLeader != null) {
if (state.curLastModified != null) {
log.debug("harvesState: " + format(state.curID) + ", " + this.df.format(state.curLastModified));
}
if (curBatchLeader.getMaxLastModified().equals(state.curLastModified)) {
Observation observation = this.observationDAO.get(curBatchLeader.getID());
log.debug("current batch: " + format(observation.getID()) + ", " + this.df.format(curBatchLeader.getMaxLastModified()));
if (state.curID != null && state.curID.equals(observation.getID())) {
iter.remove();
}
}
}
}
log.info("Found: " + observationStates.size());
for (ObservationState observationState : observationStates) {
try {
this.observationDAO.getTransactionManager().startTransaction();
Observation observation = this.observationDAO.get(observationState.getID());
if (observation == null) {
log.debug("Observation no longer exists: " + observationState.getURI());
} else {
// will make progress even on failures
state.curLastModified = observation.getMaxLastModified();
state.curID = observation.getID();
for (Plane plane : observation.getPlanes()) {
for (Artifact artifact : plane.getArtifacts()) {
Date releaseDate = AccessUtil.getReleaseDate(artifact, plane.metaRelease, plane.dataRelease);
if (releaseDate == null) {
// null date means private
log.debug("null release date, skipping");
} else {
logStart(format(state.curID), artifact);
boolean success = true;
boolean added = false;
String message = null;
this.caomChecksum = getMD5Sum(artifact.contentChecksum);
if (this.caomChecksum == null) {
this.caomChecksum = "null";
}
if (artifact.contentLength == null) {
this.caomContentLength = null;
} else {
this.caomContentLength = artifact.contentLength;
}
this.storageContentLength = 0;
this.reason = "None";
this.errorMessage = null;
this.processedCount++;
if (releaseDate.after(start)) {
// private and release date is not null, download in the future
this.errorMessage = ArtifactHarvester.PROPRIETARY;
}
try {
// by default, do not add to the skip table
boolean correctCopy = true;
// artifact is not in storage if storage policy is 'PUBLIC ONLY' and the artifact is proprietary
if ((StoragePolicy.ALL == storagePolicy) || this.errorMessage == null) {
// correctCopy is false if: checksum mismatch, content length mismatch or not in storage
// "not in storage" includes failing to resolve the artifact URI
correctCopy = checkArtifactInStorage(artifact.getURI());
log.debug("Artifact " + artifact.getURI() + " with MD5 " + artifact.contentChecksum + " correct copy: " + correctCopy);
}
if ((StoragePolicy.PUBLIC_ONLY == storagePolicy && ArtifactHarvester.PROPRIETARY.equals(this.errorMessage)) || !correctCopy) {
HarvestSkipURI skip = harvestSkipURIDAO.get(source, STATE_CLASS, artifact.getURI());
if (skip == null) {
// not in skip table, add it
skip = new HarvestSkipURI(source, STATE_CLASS, artifact.getURI(), releaseDate, this.errorMessage);
}
if (ArtifactHarvester.PROPRIETARY.equals(skip.errorMessage) || ArtifactHarvester.PROPRIETARY.equals(this.errorMessage)) {
skip.setTryAfter(releaseDate);
skip.errorMessage = errorMessage;
}
this.harvestSkipURIDAO.put(skip);
this.downloadCount++;
added = true;
if (skip != null) {
this.downloadCount--;
if (ArtifactHarvester.PROPRIETARY.equals(this.errorMessage)) {
this.updateCount++;
message = this.errorMessage + " artifact already exists in skip table, update tryAfter date to release date.";
} else {
added = false;
String msg = "artifact already exists in skip table.";
;
if (this.reason.equalsIgnoreCase("None")) {
this.reason = "Public " + msg;
} else {
this.reason = this.reason + " and public " + msg;
}
}
}
}
} catch (Exception ex) {
success = false;
message = "Failed to determine if artifact " + artifact.getURI() + " exists: " + ex.getMessage();
log.error(message, ex);
}
logEnd(format(state.curID), artifact, success, added, message);
}
}
}
}
this.harvestStateDAO.put(state);
log.debug("Updated artifact harvest state. Date: " + state.curLastModified);
log.debug("Updated artifact harvest state. ID: " + format(state.curID));
this.observationDAO.getTransactionManager().commitTransaction();
} catch (Throwable t) {
this.observationDAO.getTransactionManager().rollbackTransaction();
throw t;
}
}
return (observationStates.size() < batchSize + 1);
} finally {
logBatchEnd();
}
}
use of ca.nrc.cadc.caom2.harvester.state.HarvestSkipURI in project caom2db by opencadc.
the class ArtifactValidator method addToSkipTable.
private void addToSkipTable(Artifact artifact, Date releaseDate, ArtifactMetadata metadata, String errorMessage) throws URISyntaxException {
// add to HavestSkipURI table if there is not already a row in the table
HarvestSkipURI skip = new HarvestSkipURI(source, STATE_CLASS, metadata.getArtifactURI(), releaseDate, errorMessage);
harvestSkipURIDAO.put(skip);
newSkipURICount++;
String releaseDateString = df.format(releaseDate);
String errorMessageString = (errorMessage == null) ? "null" : skip.errorMessage;
logJSON(new String[] { "logType", "detail", "action", "addedToSkipTable", "artifactURI", metadata.getArtifactURI().toASCIIString(), "caomCollection", collection, "caomChecksum", metadata.getChecksum(), "releaseDate", releaseDateString, "errorMessage", errorMessageString }, true);
}
use of ca.nrc.cadc.caom2.harvester.state.HarvestSkipURI in project caom2db by opencadc.
the class ArtifactValidator method addToOrUpdateSkipTable.
private void addToOrUpdateSkipTable(ArtifactMetadata metadata, String errorMessage) throws URISyntaxException {
// add to HavestSkipURI table if there is not already a row in the table
Artifact artifact = new Artifact(metadata.getArtifactURI(), metadata.productType, metadata.releaseType);
Date releaseDate = AccessUtil.getReleaseDate(artifact, metadata.metaRelease, metadata.dataRelease);
if (releaseDate != null) {
HarvestSkipURI skip = harvestSkipURIDAO.get(source, STATE_CLASS, metadata.getArtifactURI());
if (skip == null) {
// not in skip table, add it
addToSkipTable(artifact, releaseDate, metadata, errorMessage);
} else {
updateSkipTable(skip, releaseDate, metadata, errorMessage);
}
}
}
use of ca.nrc.cadc.caom2.harvester.state.HarvestSkipURI in project caom2db by opencadc.
the class ArtifactValidator method getLogicalMetadata.
private TreeSet<ArtifactMetadata> getLogicalMetadata() throws Exception {
TreeSet<ArtifactMetadata> result = new TreeSet<>(ArtifactMetadata.getComparator());
if (StringUtil.hasText(source)) {
// use database <server.database.schema>
// HarvestSkipURI table is not supported in 'diff' mode, i.e. reportOnly = true
this.supportSkipURITable = !reportOnly;
long t1 = System.currentTimeMillis();
List<ObservationState> states = observationDAO.getObservationList(collection, null, null, null);
long t2 = System.currentTimeMillis();
long dt = t2 - t1;
log.info("get-state-list: size=" + states.size() + " in " + dt + " ms");
int depth = 3;
ListIterator<ObservationState> iter = states.listIterator();
t1 = System.currentTimeMillis();
while (iter.hasNext()) {
ObservationState s = iter.next();
// GC
iter.remove();
ObservationResponse resp = observationDAO.getObservationResponse(s, depth);
if (resp == null) {
log.error("Null response from Observation DAO, ObservationURI: " + s.getURI().toString() + ", depth: " + depth);
} else if (resp.observation == null) {
log.error("Observation is null, ObservationURI: " + s.getURI().toString() + ", depth: " + depth);
} else {
for (Plane plane : resp.observation.getPlanes()) {
for (Artifact artifact : plane.getArtifacts()) {
String observationID = s.getURI().getObservationID();
result.add(getMetadata(observationID, artifact, plane.dataRelease, plane.metaRelease));
}
}
}
}
log.info("Finished logical metadata query in " + (System.currentTimeMillis() - t1) + " ms");
} else {
this.supportSkipURITable = false;
if (caomTapResourceID != null) {
// source is a TAP resource ID
AuthMethod authMethod = AuthenticationUtil.getAuthMethodFromCredentials(AuthenticationUtil.getCurrentSubject());
TapClient tapClient = new TapClient(caomTapResourceID);
try {
this.caomTapURL = tapClient.getSyncURL(authMethod);
} catch (ResourceNotFoundException ex) {
if (ex.getMessage().contains("with password")) {
throw new ResourceNotFoundException("TAP service for " + caomTapResourceID + " does not support password authentication.", ex);
}
}
}
// source is a TAP service URL or a TAP resource ID
String adql = "select distinct(a.uri), a.contentChecksum, a.contentLength, a.contentType, o.observationID, " + "a.productType, a.releaseType, p.dataRelease, p.metaRelease " + "from caom2.Artifact a " + "join caom2.Plane p on a.planeID = p.planeID " + "join caom2.Observation o on p.obsID = o.obsID " + "where o.collection='" + collection + "'";
log.debug("logical query: " + adql);
long start = System.currentTimeMillis();
result = query(caomTapURL, adql);
log.info("Finished caom2 query in " + (System.currentTimeMillis() - start) + " ms");
}
return result;
}
use of ca.nrc.cadc.caom2.harvester.state.HarvestSkipURI in project caom2db by opencadc.
the class ObservationValidator method doit.
private Progress doit() {
Progress ret = new Progress();
long t = System.currentTimeMillis();
long timeState = -1;
long timeQuery = -1;
long timeTransaction = -1;
try {
// hint
System.gc();
t = System.currentTimeMillis();
timeState = System.currentTimeMillis() - t;
t = System.currentTimeMillis();
log.info("getObservationList: " + src.getIdentifier());
List<ObservationState> tmpSrcState = null;
if (srcObservationDAO != null) {
tmpSrcState = srcObservationDAO.getObservationList(src.getCollection(), null, null, null);
} else if (srcObservationService != null) {
tmpSrcState = srcObservationService.getObservationList(src.getCollection(), null, null, null);
} else {
throw new RuntimeException("BUG: both srcObservationDAO and srcObservationService are null");
}
log.info("found: " + tmpSrcState.size());
Set<ObservationState> srcState = new TreeSet<>(compStateUri);
srcState.addAll(tmpSrcState);
// GC
tmpSrcState = null;
log.info("source set: " + srcState.size());
log.info("getObservationList: " + dest.getIdentifier());
List<ObservationState> tmpDstState = destObservationDAO.getObservationList(dest.getCollection(), null, null, null);
log.info("found: " + tmpDstState.size());
Set<ObservationState> dstState = new TreeSet<>(compStateUri);
dstState.addAll(tmpDstState);
// GC
tmpDstState = null;
log.info("destination set: " + dstState.size());
Set<ObservationStateError> errlist = calculateErroneousObservationStates(srcState, dstState);
log.info("discrepancies found: " + errlist.size());
timeQuery = System.currentTimeMillis() - t;
t = System.currentTimeMillis();
List<SkippedWrapperURI<ObservationStateError>> entityListSrc = wrap(errlist);
ret.found = srcState.size();
ListIterator<SkippedWrapperURI<ObservationStateError>> iter = entityListSrc.listIterator();
while (iter.hasNext()) {
SkippedWrapperURI<ObservationStateError> ow = iter.next();
ObservationStateError o = ow.entity;
// allow garbage collection during loop
iter.remove();
String skipMsg = null;
try {
if (!dryrun) {
if (o != null) {
skipMsg = o.toString();
try {
log.debug("starting HarvestSkipURI transaction");
boolean putSkip = true;
HarvestSkipURI skip = harvestSkip.get(source, cname, o.getObs().getURI().getURI());
Date tryAfter = o.getObs().maxLastModified;
if (skip == null) {
skip = new HarvestSkipURI(source, cname, o.getObs().getURI().getURI(), tryAfter, skipMsg);
ret.added++;
} else {
// avoid lastModified update for no change
putSkip = false;
ret.known++;
}
if (destObservationDAO.getTransactionManager().isOpen()) {
throw new RuntimeException("BUG: found open trasnaction at start of next observation");
}
log.debug("starting transaction");
destObservationDAO.getTransactionManager().startTransaction();
// track the fail
if (putSkip) {
log.info("put: " + skip);
harvestSkip.put(skip);
} else {
log.info("known: " + skip);
}
} catch (Throwable oops) {
log.warn("failed to insert HarvestSkipURI", oops);
destObservationDAO.getTransactionManager().rollbackTransaction();
log.warn("rollback HarvestSkipURI: OK");
}
}
log.debug("committing transaction");
destObservationDAO.getTransactionManager().commitTransaction();
log.debug("commit: OK");
}
ret.mismatch++;
} catch (Throwable oops) {
String str = oops.toString();
if (oops instanceof Error) {
log.error("FATAL - probably installation or environment", oops);
} else if (oops instanceof NullPointerException) {
log.error("BUG", oops);
} else if (oops instanceof BadSqlGrammarException) {
log.error("BUG", oops);
BadSqlGrammarException bad = (BadSqlGrammarException) oops;
SQLException sex1 = bad.getSQLException();
if (sex1 != null) {
log.error("CAUSE", sex1);
SQLException sex2 = sex1.getNextException();
log.error("NEXT CAUSE", sex2);
}
} else if (oops instanceof DataAccessResourceFailureException) {
log.error("SEVERE PROBLEM - probably out of space in database", oops);
} else if (oops instanceof DataIntegrityViolationException && str.contains("duplicate key value violates unique constraint \"i_observationuri\"")) {
log.error("CONTENT PROBLEM - duplicate observation: " + " " + o.getObs().getURI().getURI().toASCIIString());
} else if (oops instanceof UncategorizedSQLException) {
if (str.contains("spherepoly_from_array")) {
log.error("UNDETECTED illegal polygon: " + o.getObs().getURI().getURI());
} else {
log.error("unexpected exception", oops);
}
} else if (oops instanceof IllegalArgumentException && str.contains("CaomValidator") && str.contains("keywords")) {
log.error("CONTENT PROBLEM - invalid keywords: " + " " + o.getObs().getURI().getURI().toASCIIString());
} else {
log.error("unexpected exception", oops);
}
}
}
} finally {
timeTransaction = System.currentTimeMillis() - t;
log.debug("time to get HarvestState: " + timeState + "ms");
log.debug("time to run ObservationListQuery: " + timeQuery + "ms");
log.debug("time to run transactions: " + timeTransaction + "ms");
}
return ret;
}
Aggregations