Search in sources :

Example 1 with DeleteObjectsResult

use of com.amazonaws.services.s3.model.DeleteObjectsResult in project jackrabbit by apache.

the class S3Backend method renameKeys.

/**
     * This method rename object keys in S3 concurrently. The number of
     * concurrent threads is defined by 'maxConnections' property in
     * aws.properties. As S3 doesn't have "move" command, this method simulate
     * move as copy object object to new key and then delete older key.
     */
private void renameKeys() throws DataStoreException {
    long startTime = System.currentTimeMillis();
    ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
    long count = 0;
    try {
        Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
        ObjectListing prevObjectListing = s3service.listObjects(bucket);
        List<DeleteObjectsRequest.KeyVersion> deleteList = new ArrayList<DeleteObjectsRequest.KeyVersion>();
        int nThreads = Integer.parseInt(properties.getProperty("maxConnections"));
        ExecutorService executor = Executors.newFixedThreadPool(nThreads, new NamedThreadFactory("s3-object-rename-worker"));
        boolean taskAdded = false;
        while (true) {
            for (S3ObjectSummary s3ObjSumm : prevObjectListing.getObjectSummaries()) {
                executor.execute(new KeyRenameThread(s3ObjSumm.getKey()));
                taskAdded = true;
                count++;
                // delete the object if it follows old key name format
                if (s3ObjSumm.getKey().startsWith(KEY_PREFIX)) {
                    deleteList.add(new DeleteObjectsRequest.KeyVersion(s3ObjSumm.getKey()));
                }
            }
            if (!prevObjectListing.isTruncated())
                break;
            prevObjectListing = s3service.listNextBatchOfObjects(prevObjectListing);
        }
        // This will make the executor accept no new threads
        // and finish all existing threads in the queue
        executor.shutdown();
        try {
            // Wait until all threads are finish
            while (taskAdded && !executor.awaitTermination(10, TimeUnit.SECONDS)) {
                LOG.info("Rename S3 keys tasks timedout. Waiting again");
            }
        } catch (InterruptedException ie) {
        }
        LOG.info("Renamed [{}] keys, time taken [{}]sec", count, ((System.currentTimeMillis() - startTime) / 1000));
        // Delete older keys.
        if (deleteList.size() > 0) {
            DeleteObjectsRequest delObjsReq = new DeleteObjectsRequest(bucket);
            int batchSize = 500, startIndex = 0, size = deleteList.size();
            int endIndex = batchSize < size ? batchSize : size;
            while (endIndex <= size) {
                delObjsReq.setKeys(Collections.unmodifiableList(deleteList.subList(startIndex, endIndex)));
                DeleteObjectsResult dobjs = s3service.deleteObjects(delObjsReq);
                LOG.info("Records[{}] deleted in datastore from index [{}] to [{}]", new Object[] { dobjs.getDeletedObjects().size(), startIndex, (endIndex - 1) });
                if (endIndex == size) {
                    break;
                } else {
                    startIndex = endIndex;
                    endIndex = (startIndex + batchSize) < size ? (startIndex + batchSize) : size;
                }
            }
        }
    } finally {
        if (contextClassLoader != null) {
            Thread.currentThread().setContextClassLoader(contextClassLoader);
        }
    }
}
Also used : NamedThreadFactory(org.apache.jackrabbit.core.data.util.NamedThreadFactory) ArrayList(java.util.ArrayList) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) DeleteObjectsResult(com.amazonaws.services.s3.model.DeleteObjectsResult) DeleteObjectsRequest(com.amazonaws.services.s3.model.DeleteObjectsRequest) ExecutorService(java.util.concurrent.ExecutorService)

Example 2 with DeleteObjectsResult

use of com.amazonaws.services.s3.model.DeleteObjectsResult in project jackrabbit-oak by apache.

the class S3Backend method deleteAllOlderThan.

@Override
public Set<DataIdentifier> deleteAllOlderThan(long min) throws DataStoreException {
    long start = System.currentTimeMillis();
    // S3 stores lastModified to lower boundary of timestamp in ms.
    // and hence min is reduced by 1000ms.
    min = min - 1000;
    Set<DataIdentifier> deleteIdSet = new HashSet<DataIdentifier>(30);
    ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
    try {
        Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
        ObjectListing prevObjectListing = s3service.listObjects(bucket);
        while (true) {
            List<DeleteObjectsRequest.KeyVersion> deleteList = new ArrayList<DeleteObjectsRequest.KeyVersion>();
            for (S3ObjectSummary s3ObjSumm : prevObjectListing.getObjectSummaries()) {
                if (!s3ObjSumm.getKey().startsWith(META_KEY_PREFIX)) {
                    DataIdentifier identifier = new DataIdentifier(getIdentifierName(s3ObjSumm.getKey()));
                    long lastModified = s3ObjSumm.getLastModified().getTime();
                    LOG.debug("Identifier [{}]'s lastModified = [{}]", identifier, lastModified);
                    if (lastModified < min && store.confirmDelete(identifier) && //  order is important here
                    s3service.getObjectMetadata(bucket, s3ObjSumm.getKey()).getLastModified().getTime() < min) {
                        store.deleteFromCache(identifier);
                        LOG.debug("add id [{}] to delete lists", s3ObjSumm.getKey());
                        deleteList.add(new DeleteObjectsRequest.KeyVersion(s3ObjSumm.getKey()));
                        deleteIdSet.add(identifier);
                    }
                }
            }
            if (deleteList.size() > 0) {
                DeleteObjectsRequest delObjsReq = new DeleteObjectsRequest(bucket);
                delObjsReq.setKeys(deleteList);
                DeleteObjectsResult dobjs = s3service.deleteObjects(delObjsReq);
                if (dobjs.getDeletedObjects().size() != deleteList.size()) {
                    throw new DataStoreException("Incomplete delete object request. only  " + dobjs.getDeletedObjects().size() + " out of " + deleteList.size() + " are deleted");
                } else {
                    LOG.debug("[{}] records deleted from datastore", deleteList);
                }
            }
            if (!prevObjectListing.isTruncated()) {
                break;
            }
            prevObjectListing = s3service.listNextBatchOfObjects(prevObjectListing);
        }
    } finally {
        if (contextClassLoader != null) {
            Thread.currentThread().setContextClassLoader(contextClassLoader);
        }
    }
    LOG.info("deleteAllOlderThan: min=[{}] exit. Deleted[{}] records. Number of records deleted [{}] took [{}]ms", new Object[] { min, deleteIdSet, deleteIdSet.size(), (System.currentTimeMillis() - start) });
    return deleteIdSet;
}
Also used : DataStoreException(org.apache.jackrabbit.core.data.DataStoreException) DataIdentifier(org.apache.jackrabbit.core.data.DataIdentifier) ArrayList(java.util.ArrayList) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) DeleteObjectsResult(com.amazonaws.services.s3.model.DeleteObjectsResult) DeleteObjectsRequest(com.amazonaws.services.s3.model.DeleteObjectsRequest) HashSet(java.util.HashSet)

Example 3 with DeleteObjectsResult

use of com.amazonaws.services.s3.model.DeleteObjectsResult in project jackrabbit-oak by apache.

the class S3Backend method deleteAllMetadataRecords.

public void deleteAllMetadataRecords(String prefix) {
    ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
    try {
        Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
        ListObjectsRequest listObjectsRequest = new ListObjectsRequest().withBucketName(bucket).withPrefix(addMetaKeyPrefix(prefix));
        ObjectListing metaList = s3service.listObjects(listObjectsRequest);
        List<DeleteObjectsRequest.KeyVersion> deleteList = new ArrayList<DeleteObjectsRequest.KeyVersion>();
        for (S3ObjectSummary s3ObjSumm : metaList.getObjectSummaries()) {
            deleteList.add(new DeleteObjectsRequest.KeyVersion(s3ObjSumm.getKey()));
        }
        if (deleteList.size() > 0) {
            DeleteObjectsRequest delObjsReq = new DeleteObjectsRequest(bucket);
            delObjsReq.setKeys(deleteList);
            DeleteObjectsResult dobjs = s3service.deleteObjects(delObjsReq);
        }
    } finally {
        if (contextClassLoader != null) {
            Thread.currentThread().setContextClassLoader(contextClassLoader);
        }
    }
}
Also used : ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest) ArrayList(java.util.ArrayList) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) DeleteObjectsResult(com.amazonaws.services.s3.model.DeleteObjectsResult) DeleteObjectsRequest(com.amazonaws.services.s3.model.DeleteObjectsRequest)

Example 4 with DeleteObjectsResult

use of com.amazonaws.services.s3.model.DeleteObjectsResult in project jackrabbit-oak by apache.

the class S3Backend method renameKeys.

/**
     * This method rename object keys in S3 concurrently. The number of
     * concurrent threads is defined by 'maxConnections' property in
     * aws.properties. As S3 doesn't have "move" command, this method simulate
     * move as copy object object to new key and then delete older key.
     */
private void renameKeys() throws DataStoreException {
    long startTime = System.currentTimeMillis();
    ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
    long count = 0;
    try {
        Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
        ObjectListing prevObjectListing = s3service.listObjects(bucket);
        List<DeleteObjectsRequest.KeyVersion> deleteList = new ArrayList<DeleteObjectsRequest.KeyVersion>();
        int nThreads = Integer.parseInt(properties.getProperty("maxConnections"));
        ExecutorService executor = Executors.newFixedThreadPool(nThreads, new NamedThreadFactory("s3-object-rename-worker"));
        boolean taskAdded = false;
        while (true) {
            for (S3ObjectSummary s3ObjSumm : prevObjectListing.getObjectSummaries()) {
                executor.execute(new KeyRenameThread(s3ObjSumm.getKey()));
                taskAdded = true;
                count++;
                // delete the object if it follows old key name format
                if (s3ObjSumm.getKey().startsWith(KEY_PREFIX)) {
                    deleteList.add(new DeleteObjectsRequest.KeyVersion(s3ObjSumm.getKey()));
                }
            }
            if (!prevObjectListing.isTruncated())
                break;
            prevObjectListing = s3service.listNextBatchOfObjects(prevObjectListing);
        }
        // This will make the executor accept no new threads
        // and finish all existing threads in the queue
        executor.shutdown();
        try {
            // Wait until all threads are finish
            while (taskAdded && !executor.awaitTermination(10, TimeUnit.SECONDS)) {
                LOG.info("Rename S3 keys tasks timedout. Waiting again");
            }
        } catch (InterruptedException ie) {
        }
        LOG.info("Renamed [{}] keys, time taken [{}]sec", count, ((System.currentTimeMillis() - startTime) / 1000));
        // Delete older keys.
        if (deleteList.size() > 0) {
            DeleteObjectsRequest delObjsReq = new DeleteObjectsRequest(bucket);
            int batchSize = 500, startIndex = 0, size = deleteList.size();
            int endIndex = batchSize < size ? batchSize : size;
            while (endIndex <= size) {
                delObjsReq.setKeys(Collections.unmodifiableList(deleteList.subList(startIndex, endIndex)));
                DeleteObjectsResult dobjs = s3service.deleteObjects(delObjsReq);
                LOG.info("Records[{}] deleted in datastore from index [{}] to [{}]", dobjs.getDeletedObjects().size(), startIndex, (endIndex - 1));
                if (endIndex == size) {
                    break;
                } else {
                    startIndex = endIndex;
                    endIndex = (startIndex + batchSize) < size ? (startIndex + batchSize) : size;
                }
            }
        }
    } finally {
        if (contextClassLoader != null) {
            Thread.currentThread().setContextClassLoader(contextClassLoader);
        }
    }
}
Also used : NamedThreadFactory(org.apache.jackrabbit.core.data.util.NamedThreadFactory) ArrayList(java.util.ArrayList) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) DeleteObjectsResult(com.amazonaws.services.s3.model.DeleteObjectsResult) DeleteObjectsRequest(com.amazonaws.services.s3.model.DeleteObjectsRequest) ExecutorService(java.util.concurrent.ExecutorService)

Example 5 with DeleteObjectsResult

use of com.amazonaws.services.s3.model.DeleteObjectsResult in project jackrabbit by apache.

the class S3Backend method deleteAllOlderThan.

@Override
public Set<DataIdentifier> deleteAllOlderThan(long min) throws DataStoreException {
    long start = System.currentTimeMillis();
    // S3 stores lastModified to lower boundary of timestamp in ms.
    // and hence min is reduced by 1000ms.
    min = min - 1000;
    Set<DataIdentifier> deleteIdSet = new HashSet<DataIdentifier>(30);
    ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
    try {
        Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
        ObjectListing prevObjectListing = s3service.listObjects(bucket);
        while (true) {
            List<DeleteObjectsRequest.KeyVersion> deleteList = new ArrayList<DeleteObjectsRequest.KeyVersion>();
            for (S3ObjectSummary s3ObjSumm : prevObjectListing.getObjectSummaries()) {
                DataIdentifier identifier = new DataIdentifier(getIdentifierName(s3ObjSumm.getKey()));
                long lastModified = s3ObjSumm.getLastModified().getTime();
                LOG.debug("Identifier [{}]'s lastModified = [{}]", identifier, lastModified);
                if (lastModified < min && getDataStore().confirmDelete(identifier) && //  order is important here
                s3service.getObjectMetadata(bucket, s3ObjSumm.getKey()).getLastModified().getTime() < min) {
                    getDataStore().deleteFromCache(identifier);
                    LOG.debug("add id [{}] to delete lists", s3ObjSumm.getKey());
                    deleteList.add(new DeleteObjectsRequest.KeyVersion(s3ObjSumm.getKey()));
                    deleteIdSet.add(identifier);
                }
            }
            if (deleteList.size() > 0) {
                DeleteObjectsRequest delObjsReq = new DeleteObjectsRequest(bucket);
                delObjsReq.setKeys(deleteList);
                DeleteObjectsResult dobjs = s3service.deleteObjects(delObjsReq);
                if (dobjs.getDeletedObjects().size() != deleteList.size()) {
                    throw new DataStoreException("Incomplete delete object request. only  " + dobjs.getDeletedObjects().size() + " out of " + deleteList.size() + " are deleted");
                } else {
                    LOG.debug("[{}] records deleted from datastore", deleteList);
                }
            }
            if (!prevObjectListing.isTruncated()) {
                break;
            }
            prevObjectListing = s3service.listNextBatchOfObjects(prevObjectListing);
        }
    } finally {
        if (contextClassLoader != null) {
            Thread.currentThread().setContextClassLoader(contextClassLoader);
        }
    }
    LOG.info("deleteAllOlderThan: min=[{}] exit. Deleted[{}] records. Number of records deleted [{}] took [{}]ms", new Object[] { min, deleteIdSet, deleteIdSet.size(), (System.currentTimeMillis() - start) });
    return deleteIdSet;
}
Also used : DataStoreException(org.apache.jackrabbit.core.data.DataStoreException) DataIdentifier(org.apache.jackrabbit.core.data.DataIdentifier) ArrayList(java.util.ArrayList) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) DeleteObjectsResult(com.amazonaws.services.s3.model.DeleteObjectsResult) DeleteObjectsRequest(com.amazonaws.services.s3.model.DeleteObjectsRequest) HashSet(java.util.HashSet)

Aggregations

DeleteObjectsRequest (com.amazonaws.services.s3.model.DeleteObjectsRequest)6 DeleteObjectsResult (com.amazonaws.services.s3.model.DeleteObjectsResult)6 ObjectListing (com.amazonaws.services.s3.model.ObjectListing)6 S3ObjectSummary (com.amazonaws.services.s3.model.S3ObjectSummary)6 ArrayList (java.util.ArrayList)6 ExecutorService (java.util.concurrent.ExecutorService)3 NamedThreadFactory (org.apache.jackrabbit.core.data.util.NamedThreadFactory)3 HashSet (java.util.HashSet)2 DataIdentifier (org.apache.jackrabbit.core.data.DataIdentifier)2 DataStoreException (org.apache.jackrabbit.core.data.DataStoreException)2 ListObjectsRequest (com.amazonaws.services.s3.model.ListObjectsRequest)1