Search in sources :

Example 1 with HarvestState

use of ca.nrc.cadc.caom2.harvester.state.HarvestState in project caom2db by opencadc.

the class ArtifactHarvester method runIt.

private Boolean runIt() throws Exception {
    this.downloadCount = 0;
    this.processedCount = 0;
    this.start = new Date();
    try {
        // Determine the state of the last run
        HarvestState state = harvestStateDAO.get(source, STATE_CLASS);
        this.startDate = state.curLastModified;
        // harvest up to a little in the past because the head of
        // the sequence may be volatile
        long fiveMinAgo = System.currentTimeMillis() - 5 * 60000L;
        Date stopDate = new Date(fiveMinAgo);
        if (startDate == null) {
            log.info("harvest window: null " + this.df.format(stopDate) + " [" + this.batchSize + "]");
        } else {
            log.info("harvest window: " + this.df.format(startDate) + " " + this.df.format(stopDate) + " [" + this.batchSize + "]");
        }
        List<ObservationState> observationStates = this.observationDAO.getObservationList(this.collection, this.startDate, stopDate, this.batchSize + 1);
        // HarvestState (normal case because query: >= startDate)
        if (!observationStates.isEmpty()) {
            ListIterator<ObservationState> iter = observationStates.listIterator();
            ObservationState curBatchLeader = iter.next();
            if (curBatchLeader != null) {
                if (state.curLastModified != null) {
                    log.debug("harvesState: " + format(state.curID) + ", " + this.df.format(state.curLastModified));
                }
                if (curBatchLeader.getMaxLastModified().equals(state.curLastModified)) {
                    Observation observation = this.observationDAO.get(curBatchLeader.getID());
                    log.debug("current batch: " + format(observation.getID()) + ", " + this.df.format(curBatchLeader.getMaxLastModified()));
                    if (state.curID != null && state.curID.equals(observation.getID())) {
                        iter.remove();
                    }
                }
            }
        }
        log.info("Found: " + observationStates.size());
        for (ObservationState observationState : observationStates) {
            try {
                this.observationDAO.getTransactionManager().startTransaction();
                Observation observation = this.observationDAO.get(observationState.getID());
                if (observation == null) {
                    log.debug("Observation no longer exists: " + observationState.getURI());
                } else {
                    // will make progress even on failures
                    state.curLastModified = observation.getMaxLastModified();
                    state.curID = observation.getID();
                    for (Plane plane : observation.getPlanes()) {
                        for (Artifact artifact : plane.getArtifacts()) {
                            Date releaseDate = AccessUtil.getReleaseDate(artifact, plane.metaRelease, plane.dataRelease);
                            if (releaseDate == null) {
                                // null date means private
                                log.debug("null release date, skipping");
                            } else {
                                logStart(format(state.curID), artifact);
                                boolean success = true;
                                boolean added = false;
                                String message = null;
                                this.caomChecksum = getMD5Sum(artifact.contentChecksum);
                                if (this.caomChecksum == null) {
                                    this.caomChecksum = "null";
                                }
                                if (artifact.contentLength == null) {
                                    this.caomContentLength = null;
                                } else {
                                    this.caomContentLength = artifact.contentLength;
                                }
                                this.storageContentLength = 0;
                                this.reason = "None";
                                this.errorMessage = null;
                                this.processedCount++;
                                if (releaseDate.after(start)) {
                                    // private and release date is not null, download in the future
                                    this.errorMessage = ArtifactHarvester.PROPRIETARY;
                                }
                                try {
                                    // by default, do not add to the skip table
                                    boolean correctCopy = true;
                                    // artifact is not in storage if storage policy is 'PUBLIC ONLY' and the artifact is proprietary
                                    if ((StoragePolicy.ALL == storagePolicy) || this.errorMessage == null) {
                                        // correctCopy is false if: checksum mismatch, content length mismatch or not in storage
                                        // "not in storage" includes failing to resolve the artifact URI
                                        correctCopy = checkArtifactInStorage(artifact.getURI());
                                        log.debug("Artifact " + artifact.getURI() + " with MD5 " + artifact.contentChecksum + " correct copy: " + correctCopy);
                                    }
                                    if ((StoragePolicy.PUBLIC_ONLY == storagePolicy && ArtifactHarvester.PROPRIETARY.equals(this.errorMessage)) || !correctCopy) {
                                        HarvestSkipURI skip = harvestSkipURIDAO.get(source, STATE_CLASS, artifact.getURI());
                                        if (skip == null) {
                                            // not in skip table, add it
                                            skip = new HarvestSkipURI(source, STATE_CLASS, artifact.getURI(), releaseDate, this.errorMessage);
                                        }
                                        if (ArtifactHarvester.PROPRIETARY.equals(skip.errorMessage) || ArtifactHarvester.PROPRIETARY.equals(this.errorMessage)) {
                                            skip.setTryAfter(releaseDate);
                                            skip.errorMessage = errorMessage;
                                        }
                                        this.harvestSkipURIDAO.put(skip);
                                        this.downloadCount++;
                                        added = true;
                                        if (skip != null) {
                                            this.downloadCount--;
                                            if (ArtifactHarvester.PROPRIETARY.equals(this.errorMessage)) {
                                                this.updateCount++;
                                                message = this.errorMessage + " artifact already exists in skip table, update tryAfter date to release date.";
                                            } else {
                                                added = false;
                                                String msg = "artifact already exists in skip table.";
                                                ;
                                                if (this.reason.equalsIgnoreCase("None")) {
                                                    this.reason = "Public " + msg;
                                                } else {
                                                    this.reason = this.reason + " and public " + msg;
                                                }
                                            }
                                        }
                                    }
                                } catch (Exception ex) {
                                    success = false;
                                    message = "Failed to determine if artifact " + artifact.getURI() + " exists: " + ex.getMessage();
                                    log.error(message, ex);
                                }
                                logEnd(format(state.curID), artifact, success, added, message);
                            }
                        }
                    }
                }
                this.harvestStateDAO.put(state);
                log.debug("Updated artifact harvest state.  Date: " + state.curLastModified);
                log.debug("Updated artifact harvest state.  ID: " + format(state.curID));
                this.observationDAO.getTransactionManager().commitTransaction();
            } catch (Throwable t) {
                this.observationDAO.getTransactionManager().rollbackTransaction();
                throw t;
            }
        }
        return (observationStates.size() < batchSize + 1);
    } finally {
        logBatchEnd();
    }
}
Also used : Plane(ca.nrc.cadc.caom2.Plane) HarvestSkipURI(ca.nrc.cadc.caom2.harvester.state.HarvestSkipURI) Date(java.util.Date) Artifact(ca.nrc.cadc.caom2.Artifact) TransientException(ca.nrc.cadc.net.TransientException) HarvestState(ca.nrc.cadc.caom2.harvester.state.HarvestState) ObservationState(ca.nrc.cadc.caom2.ObservationState) Observation(ca.nrc.cadc.caom2.Observation)

Example 2 with HarvestState

use of ca.nrc.cadc.caom2.harvester.state.HarvestState in project caom2db by opencadc.

the class ObservationValidator method initProgressFile.

// Progress file management functions
protected void initProgressFile(String src) throws IOException, ParseException, URISyntaxException {
    if (this.progressFile.exists()) {
        // read in the file
        this.progressRecord = readProgressFile();
        if (!progressRecord.getSource().equals(src)) {
            throw new IllegalStateException("existing progress file has different source: " + progressRecord.getSource());
        }
    } else {
        // Initialize the file
        progressRecord = new HarvestState(src, Observation.class.getSimpleName());
        writeProgressFile(progressRecord);
    }
}
Also used : HarvestState(ca.nrc.cadc.caom2.harvester.state.HarvestState)

Example 3 with HarvestState

use of ca.nrc.cadc.caom2.harvester.state.HarvestState in project caom2db by opencadc.

the class ObservationRemover method run.

@Override
public void run() {
    // Get HarvestState record to see if this collection has actually been harvested before
    HarvestStateDAO harvestStateDAO = new PostgresqlHarvestStateDAO(obsDAO.getDataSource(), target.getDatabase(), target.getSchema());
    HarvestState harvestStateRec = null;
    if (src != null) {
        harvestStateRec = harvestStateDAO.get(src.getIdentifier(), Observation.class.getSimpleName());
        // in this case, getLastModified is null
        if (harvestStateRec.getLastModified() == null) {
            log.error("no found: HarvestState record for " + src.getIdentifier());
            return;
        }
    }
    log.info("Using batchSize: " + batchSize);
    log.info("Removing Observation(s) ...");
    Progress prog = null;
    int total = 0;
    boolean go = true;
    while (go) {
        prog = deleteObservations();
        if (prog.found > 0) {
            log.info("finished batch: " + prog.toString());
        }
        if (prog.abort) {
            log.error("batch aborted");
        }
        total += prog.found;
        go = (prog.found > 0 && !prog.abort);
    }
    log.info("Removed " + total + " observations");
    if (prog.abort) {
        log.warn("Problem removing observations. Quitting...");
        return;
    }
    log.info("Removing DeletedObservation(s)...");
    total = 0;
    go = true;
    while (go) {
        prog = deleteDeletedObservations();
        if (prog.found > 0) {
            log.info("finished batch: " + prog.toString());
        }
        if (prog.abort) {
            log.error("batch aborted");
        }
        total += prog.found;
        go = (prog.found > 0 && !prog.abort);
    }
    log.info("Removed " + total + " DeletedObservation(s)");
    if (prog.abort) {
        log.warn("Problem removing DeletedObservation(s). Quitting...");
        return;
    }
    if (src != null) {
        log.info("Removing HarvesetSkipURI(s)...");
        total = 0;
        go = true;
        while (go) {
            prog = deleteHarvestSkipURI();
            if (prog.found > 0) {
                log.info("finished batch: " + prog.toString());
            }
            if (prog.abort) {
                log.error("batch aborted");
            }
            total += prog.found;
            go = (prog.found > 0 && !prog.abort);
        }
        log.info("Removed " + total + " HarvesetSkipURI(s)");
        if (prog.abort) {
            log.warn("Problem removing HarvesetSkipURI(s). Quitting...");
            return;
        }
    }
    if (src != null) {
        log.info("Deleting harvest state records...");
        harvestStateDAO.delete(harvestStateRec);
        harvestStateRec = harvestStateDAO.get(src.getIdentifier(), DeletedObservation.class.getSimpleName());
        if (harvestStateRec.getLastModified() != null) {
            harvestStateDAO.delete(harvestStateRec);
        }
    }
}
Also used : HarvestState(ca.nrc.cadc.caom2.harvester.state.HarvestState) PostgresqlHarvestStateDAO(ca.nrc.cadc.caom2.harvester.state.PostgresqlHarvestStateDAO) HarvestStateDAO(ca.nrc.cadc.caom2.harvester.state.HarvestStateDAO) PostgresqlHarvestStateDAO(ca.nrc.cadc.caom2.harvester.state.PostgresqlHarvestStateDAO)

Example 4 with HarvestState

use of ca.nrc.cadc.caom2.harvester.state.HarvestState in project caom2db by opencadc.

the class DeletionHarvester method doit.

/**
 * Does the work
 *
 * @return progress status
 */
private Progress doit() {
    log.info("batch: " + entityClass.getSimpleName());
    Progress ret = new Progress();
    int expectedNum = Integer.MAX_VALUE;
    if (batchSize != null) {
        expectedNum = batchSize.intValue();
    }
    boolean correct = true;
    try {
        HarvestState state = harvestStateDAO.get(source, cname);
        log.info("last harvest: " + format(state.curLastModified));
        if (initHarvestState && state.curLastModified == null) {
            state.curLastModified = initDate;
            harvestStateDAO.put(state);
            state = harvestStateDAO.get(source, cname);
            log.info("harvest state initialised to: " + df.format(state.curLastModified));
        }
        startDate = state.curLastModified;
        if (firstIteration) {
            if (super.minDate != null) {
                // override state
                startDate = super.minDate;
            }
            endDate = super.maxDate;
            // harvest up to a little in the past because the head of the
            // sequence may be volatile
            long fiveMinAgo = System.currentTimeMillis() - 5 * 60000L;
            if (endDate == null) {
                endDate = new Date(fiveMinAgo);
            } else {
                endDate = new Date(Math.min(fiveMinAgo, endDate.getTime()));
            }
        }
        firstIteration = false;
        List<DeletedObservation> entityList = null;
        String source = null;
        if (deletedDAO != null) {
            source = "deletedDAO";
            entityList = deletedDAO.getList(src.getCollection(), startDate, endDate, batchSize);
        } else {
            source = "repoClient";
            entityList = repoClient.getDeleted(src.getCollection(), startDate, endDate, batchSize);
        }
        if (entityList == null) {
            throw new RuntimeException("Error gathering deleted observations from " + source);
        }
        if (entityList.size() == expectedNum) {
            detectLoop(entityList);
        }
        ret.found = entityList.size();
        log.info("found: " + entityList.size());
        ListIterator<DeletedObservation> iter = entityList.listIterator();
        while (iter.hasNext()) {
            DeletedObservation de = iter.next();
            // allow garbage collection asap
            iter.remove();
            log.debug("Observation read from deletion end-point: " + de.getID() + " date = " + de.getLastModified());
            if (!dryrun) {
                txnManager.startTransaction();
            }
            boolean ok = false;
            try {
                if (!dryrun) {
                    state.curLastModified = de.getLastModified();
                    state.curID = de.getID();
                    ObservationState cur = obsDAO.getState(de.getID());
                    if (cur != null) {
                        log.debug("Observation: " + de.getID() + " found in DB");
                        Date lastUpdate = cur.getMaxLastModified();
                        Date deleted = de.getLastModified();
                        log.debug("to be deleted: " + de.getClass().getSimpleName() + " " + de.getURI() + " " + de.getID() + "deleted date " + format(de.getLastModified()) + " modified date " + format(cur.getMaxLastModified()));
                        if (deleted.after(lastUpdate)) {
                            log.info("delete: " + de.getClass().getSimpleName() + " " + de.getURI() + " " + de.getID());
                            obsDAO.delete(de.getID());
                            ret.deleted++;
                        } else {
                            log.info("skip out-of-date delete: " + de.getClass().getSimpleName() + " " + de.getURI() + " " + de.getID() + " " + format(de.getLastModified()));
                            ret.skipped++;
                        }
                    } else {
                        log.debug("Observation: " + de.getID() + " not found in DB");
                    }
                    // track progress
                    harvestStateDAO.put(state);
                    log.debug("committing transaction");
                    txnManager.commitTransaction();
                    log.debug("commit: OK");
                }
                ok = true;
            } catch (Throwable t) {
                log.error("unexpected exception", t);
            } finally {
                if (!ok && !dryrun) {
                    log.warn("failed to process " + de + ": trying to rollback the transaction");
                    txnManager.rollbackTransaction();
                    log.warn("rollback: OK");
                    ret.abort = true;
                }
            }
        }
        if (ret.found < expectedNum) {
            ret.done = true;
            if (state != null && state.curLastModified != null && ret.found > 0) {
                // tweak HarvestState so we don't keep picking up the same
                // one
                // 1
                Date n = new Date(state.curLastModified.getTime() + 1L);
                // ms
                // ahead
                Date now = new Date();
                if (now.getTime() - n.getTime() > 600 * 1000L) {
                    n = new Date(state.curLastModified.getTime() + 100L);
                }
                // ahead
                state.curLastModified = n;
                log.info("reached last " + entityClass.getSimpleName() + ": setting curLastModified to " + format(state.curLastModified));
                harvestStateDAO.put(state);
            }
        }
    } catch (Throwable t) {
        log.error("unexpected exception", t);
        ret.abort = true;
        correct = false;
    } finally {
        if (correct) {
            log.debug("DONE");
        }
    }
    return ret;
}
Also used : Date(java.util.Date) HarvestState(ca.nrc.cadc.caom2.harvester.state.HarvestState) ObservationState(ca.nrc.cadc.caom2.ObservationState) DeletedObservation(ca.nrc.cadc.caom2.DeletedObservation)

Example 5 with HarvestState

use of ca.nrc.cadc.caom2.harvester.state.HarvestState in project caom2db by opencadc.

the class ObservationValidator method doit.

private Progress doit() {
    Progress ret = new Progress();
    long t = System.currentTimeMillis();
    long timeState = -1;
    long timeQuery = -1;
    long timeTransaction = -1;
    try {
        // hint
        System.gc();
        t = System.currentTimeMillis();
        timeState = System.currentTimeMillis() - t;
        t = System.currentTimeMillis();
        log.info("getObservationList: " + src.getIdentifier());
        List<ObservationState> tmpSrcState = null;
        if (srcObservationDAO != null) {
            tmpSrcState = srcObservationDAO.getObservationList(src.getCollection(), null, null, null);
        } else if (srcObservationService != null) {
            tmpSrcState = srcObservationService.getObservationList(src.getCollection(), null, null, null);
        } else {
            throw new RuntimeException("BUG: both srcObservationDAO and srcObservationService are null");
        }
        log.info("found: " + tmpSrcState.size());
        Set<ObservationState> srcState = new TreeSet<>(compStateUri);
        srcState.addAll(tmpSrcState);
        // GC
        tmpSrcState = null;
        log.info("source set: " + srcState.size());
        log.info("getObservationList: " + dest.getIdentifier());
        List<ObservationState> tmpDstState = destObservationDAO.getObservationList(dest.getCollection(), null, null, null);
        log.info("found: " + tmpDstState.size());
        Set<ObservationState> dstState = new TreeSet<>(compStateUri);
        dstState.addAll(tmpDstState);
        // GC
        tmpDstState = null;
        log.info("destination set: " + dstState.size());
        Set<ObservationStateError> errlist = calculateErroneousObservationStates(srcState, dstState);
        log.info("discrepancies found: " + errlist.size());
        timeQuery = System.currentTimeMillis() - t;
        t = System.currentTimeMillis();
        List<SkippedWrapperURI<ObservationStateError>> entityListSrc = wrap(errlist);
        ret.found = srcState.size();
        ListIterator<SkippedWrapperURI<ObservationStateError>> iter = entityListSrc.listIterator();
        while (iter.hasNext()) {
            SkippedWrapperURI<ObservationStateError> ow = iter.next();
            ObservationStateError o = ow.entity;
            // allow garbage collection during loop
            iter.remove();
            String skipMsg = null;
            try {
                if (!dryrun) {
                    if (o != null) {
                        skipMsg = o.toString();
                        try {
                            log.debug("starting HarvestSkipURI transaction");
                            boolean putSkip = true;
                            HarvestSkipURI skip = harvestSkip.get(source, cname, o.getObs().getURI().getURI());
                            Date tryAfter = o.getObs().maxLastModified;
                            if (skip == null) {
                                skip = new HarvestSkipURI(source, cname, o.getObs().getURI().getURI(), tryAfter, skipMsg);
                                ret.added++;
                            } else {
                                // avoid lastModified update for no change
                                putSkip = false;
                                ret.known++;
                            }
                            if (destObservationDAO.getTransactionManager().isOpen()) {
                                throw new RuntimeException("BUG: found open trasnaction at start of next observation");
                            }
                            log.debug("starting transaction");
                            destObservationDAO.getTransactionManager().startTransaction();
                            // track the fail
                            if (putSkip) {
                                log.info("put: " + skip);
                                harvestSkip.put(skip);
                            } else {
                                log.info("known: " + skip);
                            }
                        } catch (Throwable oops) {
                            log.warn("failed to insert HarvestSkipURI", oops);
                            destObservationDAO.getTransactionManager().rollbackTransaction();
                            log.warn("rollback HarvestSkipURI: OK");
                        }
                    }
                    log.debug("committing transaction");
                    destObservationDAO.getTransactionManager().commitTransaction();
                    log.debug("commit: OK");
                }
                ret.mismatch++;
            } catch (Throwable oops) {
                String str = oops.toString();
                if (oops instanceof Error) {
                    log.error("FATAL - probably installation or environment", oops);
                } else if (oops instanceof NullPointerException) {
                    log.error("BUG", oops);
                } else if (oops instanceof BadSqlGrammarException) {
                    log.error("BUG", oops);
                    BadSqlGrammarException bad = (BadSqlGrammarException) oops;
                    SQLException sex1 = bad.getSQLException();
                    if (sex1 != null) {
                        log.error("CAUSE", sex1);
                        SQLException sex2 = sex1.getNextException();
                        log.error("NEXT CAUSE", sex2);
                    }
                } else if (oops instanceof DataAccessResourceFailureException) {
                    log.error("SEVERE PROBLEM - probably out of space in database", oops);
                } else if (oops instanceof DataIntegrityViolationException && str.contains("duplicate key value violates unique constraint \"i_observationuri\"")) {
                    log.error("CONTENT PROBLEM - duplicate observation: " + " " + o.getObs().getURI().getURI().toASCIIString());
                } else if (oops instanceof UncategorizedSQLException) {
                    if (str.contains("spherepoly_from_array")) {
                        log.error("UNDETECTED illegal polygon: " + o.getObs().getURI().getURI());
                    } else {
                        log.error("unexpected exception", oops);
                    }
                } else if (oops instanceof IllegalArgumentException && str.contains("CaomValidator") && str.contains("keywords")) {
                    log.error("CONTENT PROBLEM - invalid keywords: " + " " + o.getObs().getURI().getURI().toASCIIString());
                } else {
                    log.error("unexpected exception", oops);
                }
            }
        }
    } finally {
        timeTransaction = System.currentTimeMillis() - t;
        log.debug("time to get HarvestState: " + timeState + "ms");
        log.debug("time to run ObservationListQuery: " + timeQuery + "ms");
        log.debug("time to run transactions: " + timeTransaction + "ms");
    }
    return ret;
}
Also used : BadSqlGrammarException(org.springframework.jdbc.BadSqlGrammarException) UncategorizedSQLException(org.springframework.jdbc.UncategorizedSQLException) SQLException(java.sql.SQLException) UncategorizedSQLException(org.springframework.jdbc.UncategorizedSQLException) TreeSet(java.util.TreeSet) DataAccessResourceFailureException(org.springframework.dao.DataAccessResourceFailureException) HarvestSkipURI(ca.nrc.cadc.caom2.harvester.state.HarvestSkipURI) Date(java.util.Date) DataIntegrityViolationException(org.springframework.dao.DataIntegrityViolationException) ObservationState(ca.nrc.cadc.caom2.ObservationState)

Aggregations

HarvestState (ca.nrc.cadc.caom2.harvester.state.HarvestState)7 Date (java.util.Date)7 ObservationState (ca.nrc.cadc.caom2.ObservationState)4 HarvestSkipURI (ca.nrc.cadc.caom2.harvester.state.HarvestSkipURI)3 UtilTest (ca.nrc.cadc.caom2.persistence.UtilTest)3 Test (org.junit.Test)3 Artifact (ca.nrc.cadc.caom2.Artifact)2 Observation (ca.nrc.cadc.caom2.Observation)2 Plane (ca.nrc.cadc.caom2.Plane)2 PostgresqlHarvestStateDAO (ca.nrc.cadc.caom2.harvester.state.PostgresqlHarvestStateDAO)2 TransientException (ca.nrc.cadc.net.TransientException)2 SQLException (java.sql.SQLException)2 DataAccessResourceFailureException (org.springframework.dao.DataAccessResourceFailureException)2 BadSqlGrammarException (org.springframework.jdbc.BadSqlGrammarException)2 DeletedObservation (ca.nrc.cadc.caom2.DeletedObservation)1 ObservationResponse (ca.nrc.cadc.caom2.ObservationResponse)1 ObservationURI (ca.nrc.cadc.caom2.ObservationURI)1 HarvestStateDAO (ca.nrc.cadc.caom2.harvester.state.HarvestStateDAO)1 BufferedReader (java.io.BufferedReader)1 FileReader (java.io.FileReader)1