Search in sources :

Example 1 with HarvestSkipURI

use of ca.nrc.cadc.caom2.harvester.state.HarvestSkipURI in project caom2db by opencadc.

the class ArtifactHarvester method runIt.

private Boolean runIt() throws Exception {
    this.downloadCount = 0;
    this.processedCount = 0;
    this.start = new Date();
    try {
        // Determine the state of the last run
        HarvestState state = harvestStateDAO.get(source, STATE_CLASS);
        this.startDate = state.curLastModified;
        // harvest up to a little in the past because the head of
        // the sequence may be volatile
        long fiveMinAgo = System.currentTimeMillis() - 5 * 60000L;
        Date stopDate = new Date(fiveMinAgo);
        if (startDate == null) {
  "harvest window: null " + this.df.format(stopDate) + " [" + this.batchSize + "]");
        } else {
  "harvest window: " + this.df.format(startDate) + " " + this.df.format(stopDate) + " [" + this.batchSize + "]");
        List<ObservationState> observationStates = this.observationDAO.getObservationList(this.collection, this.startDate, stopDate, this.batchSize + 1);
        // HarvestState (normal case because query: >= startDate)
        if (!observationStates.isEmpty()) {
            ListIterator<ObservationState> iter = observationStates.listIterator();
            ObservationState curBatchLeader =;
            if (curBatchLeader != null) {
                if (state.curLastModified != null) {
                    log.debug("harvesState: " + format(state.curID) + ", " + this.df.format(state.curLastModified));
                if (curBatchLeader.getMaxLastModified().equals(state.curLastModified)) {
                    Observation observation = this.observationDAO.get(curBatchLeader.getID());
                    log.debug("current batch: " + format(observation.getID()) + ", " + this.df.format(curBatchLeader.getMaxLastModified()));
                    if (state.curID != null && state.curID.equals(observation.getID())) {
        }"Found: " + observationStates.size());
        for (ObservationState observationState : observationStates) {
            try {
                Observation observation = this.observationDAO.get(observationState.getID());
                if (observation == null) {
                    log.debug("Observation no longer exists: " + observationState.getURI());
                } else {
                    // will make progress even on failures
                    state.curLastModified = observation.getMaxLastModified();
                    state.curID = observation.getID();
                    for (Plane plane : observation.getPlanes()) {
                        for (Artifact artifact : plane.getArtifacts()) {
                            Date releaseDate = AccessUtil.getReleaseDate(artifact, plane.metaRelease, plane.dataRelease);
                            if (releaseDate == null) {
                                // null date means private
                                log.debug("null release date, skipping");
                            } else {
                                logStart(format(state.curID), artifact);
                                boolean success = true;
                                boolean added = false;
                                String message = null;
                                this.caomChecksum = getMD5Sum(artifact.contentChecksum);
                                if (this.caomChecksum == null) {
                                    this.caomChecksum = "null";
                                if (artifact.contentLength == null) {
                                    this.caomContentLength = null;
                                } else {
                                    this.caomContentLength = artifact.contentLength;
                                this.storageContentLength = 0;
                                this.reason = "None";
                                this.errorMessage = null;
                                if (releaseDate.after(start)) {
                                    // private and release date is not null, download in the future
                                    this.errorMessage = ArtifactHarvester.PROPRIETARY;
                                try {
                                    // by default, do not add to the skip table
                                    boolean correctCopy = true;
                                    // artifact is not in storage if storage policy is 'PUBLIC ONLY' and the artifact is proprietary
                                    if ((StoragePolicy.ALL == storagePolicy) || this.errorMessage == null) {
                                        // correctCopy is false if: checksum mismatch, content length mismatch or not in storage
                                        // "not in storage" includes failing to resolve the artifact URI
                                        correctCopy = checkArtifactInStorage(artifact.getURI());
                                        log.debug("Artifact " + artifact.getURI() + " with MD5 " + artifact.contentChecksum + " correct copy: " + correctCopy);
                                    if ((StoragePolicy.PUBLIC_ONLY == storagePolicy && ArtifactHarvester.PROPRIETARY.equals(this.errorMessage)) || !correctCopy) {
                                        HarvestSkipURI skip = harvestSkipURIDAO.get(source, STATE_CLASS, artifact.getURI());
                                        if (skip == null) {
                                            // not in skip table, add it
                                            skip = new HarvestSkipURI(source, STATE_CLASS, artifact.getURI(), releaseDate, this.errorMessage);
                                        if (ArtifactHarvester.PROPRIETARY.equals(skip.errorMessage) || ArtifactHarvester.PROPRIETARY.equals(this.errorMessage)) {
                                            skip.errorMessage = errorMessage;
                                        added = true;
                                        if (skip != null) {
                                            if (ArtifactHarvester.PROPRIETARY.equals(this.errorMessage)) {
                                                message = this.errorMessage + " artifact already exists in skip table, update tryAfter date to release date.";
                                            } else {
                                                added = false;
                                                String msg = "artifact already exists in skip table.";
                                                if (this.reason.equalsIgnoreCase("None")) {
                                                    this.reason = "Public " + msg;
                                                } else {
                                                    this.reason = this.reason + " and public " + msg;
                                } catch (Exception ex) {
                                    success = false;
                                    message = "Failed to determine if artifact " + artifact.getURI() + " exists: " + ex.getMessage();
                                    log.error(message, ex);
                                logEnd(format(state.curID), artifact, success, added, message);
                log.debug("Updated artifact harvest state.  Date: " + state.curLastModified);
                log.debug("Updated artifact harvest state.  ID: " + format(state.curID));
            } catch (Throwable t) {
                throw t;
        return (observationStates.size() < batchSize + 1);
    } finally {
Also used : Plane(ca.nrc.cadc.caom2.Plane) HarvestSkipURI(ca.nrc.cadc.caom2.harvester.state.HarvestSkipURI) Date(java.util.Date) Artifact(ca.nrc.cadc.caom2.Artifact) TransientException( HarvestState(ca.nrc.cadc.caom2.harvester.state.HarvestState) ObservationState(ca.nrc.cadc.caom2.ObservationState) Observation(ca.nrc.cadc.caom2.Observation)

Example 2 with HarvestSkipURI

use of ca.nrc.cadc.caom2.harvester.state.HarvestSkipURI in project caom2db by opencadc.

the class ArtifactValidator method addToSkipTable.

private void addToSkipTable(Artifact artifact, Date releaseDate, ArtifactMetadata metadata, String errorMessage) throws URISyntaxException {
    // add to HavestSkipURI table if there is not already a row in the table
    HarvestSkipURI skip = new HarvestSkipURI(source, STATE_CLASS, metadata.getArtifactURI(), releaseDate, errorMessage);
    String releaseDateString = df.format(releaseDate);
    String errorMessageString = (errorMessage == null) ? "null" : skip.errorMessage;
    logJSON(new String[] { "logType", "detail", "action", "addedToSkipTable", "artifactURI", metadata.getArtifactURI().toASCIIString(), "caomCollection", collection, "caomChecksum", metadata.getChecksum(), "releaseDate", releaseDateString, "errorMessage", errorMessageString }, true);
Also used : HarvestSkipURI(ca.nrc.cadc.caom2.harvester.state.HarvestSkipURI)

Example 3 with HarvestSkipURI

use of ca.nrc.cadc.caom2.harvester.state.HarvestSkipURI in project caom2db by opencadc.

the class ArtifactValidator method addToOrUpdateSkipTable.

private void addToOrUpdateSkipTable(ArtifactMetadata metadata, String errorMessage) throws URISyntaxException {
    // add to HavestSkipURI table if there is not already a row in the table
    Artifact artifact = new Artifact(metadata.getArtifactURI(), metadata.productType, metadata.releaseType);
    Date releaseDate = AccessUtil.getReleaseDate(artifact, metadata.metaRelease, metadata.dataRelease);
    if (releaseDate != null) {
        HarvestSkipURI skip = harvestSkipURIDAO.get(source, STATE_CLASS, metadata.getArtifactURI());
        if (skip == null) {
            // not in skip table, add it
            addToSkipTable(artifact, releaseDate, metadata, errorMessage);
        } else {
            updateSkipTable(skip, releaseDate, metadata, errorMessage);
Also used : HarvestSkipURI(ca.nrc.cadc.caom2.harvester.state.HarvestSkipURI) Artifact(ca.nrc.cadc.caom2.Artifact) Date(java.util.Date)

Example 4 with HarvestSkipURI

use of ca.nrc.cadc.caom2.harvester.state.HarvestSkipURI in project caom2db by opencadc.

the class ArtifactValidator method getLogicalMetadata.

private TreeSet<ArtifactMetadata> getLogicalMetadata() throws Exception {
    TreeSet<ArtifactMetadata> result = new TreeSet<>(ArtifactMetadata.getComparator());
    if (StringUtil.hasText(source)) {
        // use database <server.database.schema>
        // HarvestSkipURI table is not supported in 'diff' mode, i.e. reportOnly = true
        this.supportSkipURITable = !reportOnly;
        long t1 = System.currentTimeMillis();
        List<ObservationState> states = observationDAO.getObservationList(collection, null, null, null);
        long t2 = System.currentTimeMillis();
        long dt = t2 - t1;"get-state-list: size=" + states.size() + " in " + dt + " ms");
        int depth = 3;
        ListIterator<ObservationState> iter = states.listIterator();
        t1 = System.currentTimeMillis();
        while (iter.hasNext()) {
            ObservationState s =;
            // GC
            ObservationResponse resp = observationDAO.getObservationResponse(s, depth);
            if (resp == null) {
                log.error("Null response from Observation DAO, ObservationURI: " + s.getURI().toString() + ", depth: " + depth);
            } else if (resp.observation == null) {
                log.error("Observation is null, ObservationURI: " + s.getURI().toString() + ", depth: " + depth);
            } else {
                for (Plane plane : resp.observation.getPlanes()) {
                    for (Artifact artifact : plane.getArtifacts()) {
                        String observationID = s.getURI().getObservationID();
                        result.add(getMetadata(observationID, artifact, plane.dataRelease, plane.metaRelease));
        }"Finished logical metadata query in " + (System.currentTimeMillis() - t1) + " ms");
    } else {
        this.supportSkipURITable = false;
        if (caomTapResourceID != null) {
            // source is a TAP resource ID
            AuthMethod authMethod = AuthenticationUtil.getAuthMethodFromCredentials(AuthenticationUtil.getCurrentSubject());
            TapClient tapClient = new TapClient(caomTapResourceID);
            try {
                this.caomTapURL = tapClient.getSyncURL(authMethod);
            } catch (ResourceNotFoundException ex) {
                if (ex.getMessage().contains("with password")) {
                    throw new ResourceNotFoundException("TAP service for " + caomTapResourceID + " does not support password authentication.", ex);
        // source is a TAP service URL or a TAP resource ID
        String adql = "select distinct(a.uri), a.contentChecksum, a.contentLength, a.contentType, o.observationID, " + "a.productType, a.releaseType, p.dataRelease, p.metaRelease " + "from caom2.Artifact a " + "join caom2.Plane p on a.planeID = p.planeID " + "join caom2.Observation o on p.obsID = o.obsID " + "where o.collection='" + collection + "'";
        log.debug("logical query: " + adql);
        long start = System.currentTimeMillis();
        result = query(caomTapURL, adql);"Finished caom2 query in " + (System.currentTimeMillis() - start) + " ms");
    return result;
Also used : Plane(ca.nrc.cadc.caom2.Plane) Artifact(ca.nrc.cadc.caom2.Artifact) AuthMethod(ca.nrc.cadc.auth.AuthMethod) TreeSet(java.util.TreeSet) ObservationResponse(ca.nrc.cadc.caom2.ObservationResponse) TapClient(org.opencadc.tap.TapClient) ObservationState(ca.nrc.cadc.caom2.ObservationState) ResourceNotFoundException( ArtifactMetadata(ca.nrc.cadc.caom2.artifact.ArtifactMetadata)

Example 5 with HarvestSkipURI

use of ca.nrc.cadc.caom2.harvester.state.HarvestSkipURI in project caom2db by opencadc.

the class ObservationValidator method doit.

private Progress doit() {
    Progress ret = new Progress();
    long t = System.currentTimeMillis();
    long timeState = -1;
    long timeQuery = -1;
    long timeTransaction = -1;
    try {
        // hint
        t = System.currentTimeMillis();
        timeState = System.currentTimeMillis() - t;
        t = System.currentTimeMillis();"getObservationList: " + src.getIdentifier());
        List<ObservationState> tmpSrcState = null;
        if (srcObservationDAO != null) {
            tmpSrcState = srcObservationDAO.getObservationList(src.getCollection(), null, null, null);
        } else if (srcObservationService != null) {
            tmpSrcState = srcObservationService.getObservationList(src.getCollection(), null, null, null);
        } else {
            throw new RuntimeException("BUG: both srcObservationDAO and srcObservationService are null");
        }"found: " + tmpSrcState.size());
        Set<ObservationState> srcState = new TreeSet<>(compStateUri);
        // GC
        tmpSrcState = null;"source set: " + srcState.size());"getObservationList: " + dest.getIdentifier());
        List<ObservationState> tmpDstState = destObservationDAO.getObservationList(dest.getCollection(), null, null, null);"found: " + tmpDstState.size());
        Set<ObservationState> dstState = new TreeSet<>(compStateUri);
        // GC
        tmpDstState = null;"destination set: " + dstState.size());
        Set<ObservationStateError> errlist = calculateErroneousObservationStates(srcState, dstState);"discrepancies found: " + errlist.size());
        timeQuery = System.currentTimeMillis() - t;
        t = System.currentTimeMillis();
        List<SkippedWrapperURI<ObservationStateError>> entityListSrc = wrap(errlist);
        ret.found = srcState.size();
        ListIterator<SkippedWrapperURI<ObservationStateError>> iter = entityListSrc.listIterator();
        while (iter.hasNext()) {
            SkippedWrapperURI<ObservationStateError> ow =;
            ObservationStateError o = ow.entity;
            // allow garbage collection during loop
            String skipMsg = null;
            try {
                if (!dryrun) {
                    if (o != null) {
                        skipMsg = o.toString();
                        try {
                            log.debug("starting HarvestSkipURI transaction");
                            boolean putSkip = true;
                            HarvestSkipURI skip = harvestSkip.get(source, cname, o.getObs().getURI().getURI());
                            Date tryAfter = o.getObs().maxLastModified;
                            if (skip == null) {
                                skip = new HarvestSkipURI(source, cname, o.getObs().getURI().getURI(), tryAfter, skipMsg);
                            } else {
                                // avoid lastModified update for no change
                                putSkip = false;
                            if (destObservationDAO.getTransactionManager().isOpen()) {
                                throw new RuntimeException("BUG: found open trasnaction at start of next observation");
                            log.debug("starting transaction");
                            // track the fail
                            if (putSkip) {
                      "put: " + skip);
                            } else {
                      "known: " + skip);
                        } catch (Throwable oops) {
                            log.warn("failed to insert HarvestSkipURI", oops);
                            log.warn("rollback HarvestSkipURI: OK");
                    log.debug("committing transaction");
                    log.debug("commit: OK");
            } catch (Throwable oops) {
                String str = oops.toString();
                if (oops instanceof Error) {
                    log.error("FATAL - probably installation or environment", oops);
                } else if (oops instanceof NullPointerException) {
                    log.error("BUG", oops);
                } else if (oops instanceof BadSqlGrammarException) {
                    log.error("BUG", oops);
                    BadSqlGrammarException bad = (BadSqlGrammarException) oops;
                    SQLException sex1 = bad.getSQLException();
                    if (sex1 != null) {
                        log.error("CAUSE", sex1);
                        SQLException sex2 = sex1.getNextException();
                        log.error("NEXT CAUSE", sex2);
                } else if (oops instanceof DataAccessResourceFailureException) {
                    log.error("SEVERE PROBLEM - probably out of space in database", oops);
                } else if (oops instanceof DataIntegrityViolationException && str.contains("duplicate key value violates unique constraint \"i_observationuri\"")) {
                    log.error("CONTENT PROBLEM - duplicate observation: " + " " + o.getObs().getURI().getURI().toASCIIString());
                } else if (oops instanceof UncategorizedSQLException) {
                    if (str.contains("spherepoly_from_array")) {
                        log.error("UNDETECTED illegal polygon: " + o.getObs().getURI().getURI());
                    } else {
                        log.error("unexpected exception", oops);
                } else if (oops instanceof IllegalArgumentException && str.contains("CaomValidator") && str.contains("keywords")) {
                    log.error("CONTENT PROBLEM - invalid keywords: " + " " + o.getObs().getURI().getURI().toASCIIString());
                } else {
                    log.error("unexpected exception", oops);
    } finally {
        timeTransaction = System.currentTimeMillis() - t;
        log.debug("time to get HarvestState: " + timeState + "ms");
        log.debug("time to run ObservationListQuery: " + timeQuery + "ms");
        log.debug("time to run transactions: " + timeTransaction + "ms");
    return ret;
Also used : BadSqlGrammarException(org.springframework.jdbc.BadSqlGrammarException) UncategorizedSQLException(org.springframework.jdbc.UncategorizedSQLException) SQLException(java.sql.SQLException) UncategorizedSQLException(org.springframework.jdbc.UncategorizedSQLException) TreeSet(java.util.TreeSet) DataAccessResourceFailureException(org.springframework.dao.DataAccessResourceFailureException) HarvestSkipURI(ca.nrc.cadc.caom2.harvester.state.HarvestSkipURI) Date(java.util.Date) DataIntegrityViolationException(org.springframework.dao.DataIntegrityViolationException) ObservationState(ca.nrc.cadc.caom2.ObservationState)


HarvestSkipURI (ca.nrc.cadc.caom2.harvester.state.HarvestSkipURI)8 Date (java.util.Date)6 Artifact (ca.nrc.cadc.caom2.Artifact)4 ObservationState (ca.nrc.cadc.caom2.ObservationState)4 ObservationResponse (ca.nrc.cadc.caom2.ObservationResponse)3 Plane (ca.nrc.cadc.caom2.Plane)3 Observation (ca.nrc.cadc.caom2.Observation)2 ObservationURI (ca.nrc.cadc.caom2.ObservationURI)2 HarvestSkipURIDAO (ca.nrc.cadc.caom2.harvester.state.HarvestSkipURIDAO)2 HarvestState (ca.nrc.cadc.caom2.harvester.state.HarvestState)2 UtilTest (ca.nrc.cadc.caom2.persistence.UtilTest)2 TransientException ( IOException ( URI ( SQLException (java.sql.SQLException)2 TreeSet (java.util.TreeSet)2 ExecutionException (java.util.concurrent.ExecutionException)2 Test (org.junit.Test)2 DataAccessResourceFailureException (org.springframework.dao.DataAccessResourceFailureException)2 BadSqlGrammarException (org.springframework.jdbc.BadSqlGrammarException)2