Search in sources :

Example 36 with ListObjectsRequest

use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project hadoop by apache.

the class S3AFileSystem method getFileStatus.

/**
   * Return a file status object that represents the path.
   * @param f The path we want information from
   * @return a FileStatus object
   * @throws java.io.FileNotFoundException when the path does not exist;
   * @throws IOException on other problems.
   */
public S3AFileStatus getFileStatus(final Path f) throws IOException {
    incrementStatistic(INVOCATION_GET_FILE_STATUS);
    final Path path = qualify(f);
    String key = pathToKey(path);
    LOG.debug("Getting path status for {}  ({})", path, key);
    if (!key.isEmpty()) {
        try {
            ObjectMetadata meta = getObjectMetadata(key);
            if (objectRepresentsDirectory(key, meta.getContentLength())) {
                LOG.debug("Found exact file: fake directory");
                return new S3AFileStatus(true, path, username);
            } else {
                LOG.debug("Found exact file: normal file");
                return new S3AFileStatus(meta.getContentLength(), dateToLong(meta.getLastModified()), path, getDefaultBlockSize(path), username);
            }
        } catch (AmazonServiceException e) {
            if (e.getStatusCode() != 404) {
                throw translateException("getFileStatus", path, e);
            }
        } catch (AmazonClientException e) {
            throw translateException("getFileStatus", path, e);
        }
        // Necessary?
        if (!key.endsWith("/")) {
            String newKey = key + "/";
            try {
                ObjectMetadata meta = getObjectMetadata(newKey);
                if (objectRepresentsDirectory(newKey, meta.getContentLength())) {
                    LOG.debug("Found file (with /): fake directory");
                    return new S3AFileStatus(true, path, username);
                } else {
                    LOG.warn("Found file (with /): real file? should not happen: {}", key);
                    return new S3AFileStatus(meta.getContentLength(), dateToLong(meta.getLastModified()), path, getDefaultBlockSize(path), username);
                }
            } catch (AmazonServiceException e) {
                if (e.getStatusCode() != 404) {
                    throw translateException("getFileStatus", newKey, e);
                }
            } catch (AmazonClientException e) {
                throw translateException("getFileStatus", newKey, e);
            }
        }
    }
    try {
        key = maybeAddTrailingSlash(key);
        ListObjectsRequest request = new ListObjectsRequest();
        request.setBucketName(bucket);
        request.setPrefix(key);
        request.setDelimiter("/");
        request.setMaxKeys(1);
        ObjectListing objects = listObjects(request);
        if (!objects.getCommonPrefixes().isEmpty() || !objects.getObjectSummaries().isEmpty()) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Found path as directory (with /): {}/{}", objects.getCommonPrefixes().size(), objects.getObjectSummaries().size());
                for (S3ObjectSummary summary : objects.getObjectSummaries()) {
                    LOG.debug("Summary: {} {}", summary.getKey(), summary.getSize());
                }
                for (String prefix : objects.getCommonPrefixes()) {
                    LOG.debug("Prefix: {}", prefix);
                }
            }
            return new S3AFileStatus(false, path, username);
        } else if (key.isEmpty()) {
            LOG.debug("Found root directory");
            return new S3AFileStatus(true, path, username);
        }
    } catch (AmazonServiceException e) {
        if (e.getStatusCode() != 404) {
            throw translateException("getFileStatus", key, e);
        }
    } catch (AmazonClientException e) {
        throw translateException("getFileStatus", key, e);
    }
    LOG.debug("Not Found: {}", path);
    throw new FileNotFoundException("No such file or directory: " + path);
}
Also used : Path(org.apache.hadoop.fs.Path) ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest) AmazonClientException(com.amazonaws.AmazonClientException) AmazonServiceException(com.amazonaws.AmazonServiceException) FileNotFoundException(java.io.FileNotFoundException) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) ObjectMetadata(com.amazonaws.services.s3.model.ObjectMetadata)

Example 37 with ListObjectsRequest

use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project hadoop by apache.

the class S3AFileSystem method innerDelete.

/**
   * Delete an object. See {@link #delete(Path, boolean)}.
   *
   * @param status fileStatus object
   * @param recursive if path is a directory and set to
   * true, the directory is deleted else throws an exception. In
   * case of a file the recursive can be set to either true or false.
   * @return  true if delete is successful else false.
   * @throws IOException due to inability to delete a directory or file.
   * @throws AmazonClientException on failures inside the AWS SDK
   */
private boolean innerDelete(S3AFileStatus status, boolean recursive) throws IOException, AmazonClientException {
    Path f = status.getPath();
    LOG.debug("Delete path {} - recursive {}", f, recursive);
    String key = pathToKey(f);
    if (status.isDirectory()) {
        LOG.debug("delete: Path is a directory: {}", f);
        if (!key.endsWith("/")) {
            key = key + "/";
        }
        if (key.equals("/")) {
            return rejectRootDirectoryDelete(status, recursive);
        }
        if (!recursive && !status.isEmptyDirectory()) {
            throw new PathIsNotEmptyDirectoryException(f.toString());
        }
        if (status.isEmptyDirectory()) {
            LOG.debug("Deleting fake empty directory {}", key);
            deleteObject(key);
            instrumentation.directoryDeleted();
        } else {
            LOG.debug("Getting objects for directory prefix {} to delete", key);
            ListObjectsRequest request = createListObjectsRequest(key, null);
            ObjectListing objects = listObjects(request);
            List<DeleteObjectsRequest.KeyVersion> keys = new ArrayList<>(objects.getObjectSummaries().size());
            while (true) {
                for (S3ObjectSummary summary : objects.getObjectSummaries()) {
                    keys.add(new DeleteObjectsRequest.KeyVersion(summary.getKey()));
                    LOG.debug("Got object to delete {}", summary.getKey());
                    if (keys.size() == MAX_ENTRIES_TO_DELETE) {
                        removeKeys(keys, true, false);
                    }
                }
                if (objects.isTruncated()) {
                    objects = continueListObjects(objects);
                } else {
                    if (!keys.isEmpty()) {
                        removeKeys(keys, false, false);
                    }
                    break;
                }
            }
        }
    } else {
        LOG.debug("delete: Path is a file");
        instrumentation.fileDeleted(1);
        deleteObject(key);
    }
    Path parent = f.getParent();
    if (parent != null) {
        createFakeDirectoryIfNecessary(parent);
    }
    return true;
}
Also used : Path(org.apache.hadoop.fs.Path) ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest) ArrayList(java.util.ArrayList) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) PathIsNotEmptyDirectoryException(org.apache.hadoop.fs.PathIsNotEmptyDirectoryException) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) DeleteObjectsRequest(com.amazonaws.services.s3.model.DeleteObjectsRequest)

Example 38 with ListObjectsRequest

use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project nifi by apache.

the class TestListS3 method testList.

@Test
public void testList() {
    runner.setProperty(ListS3.REGION, "eu-west-1");
    runner.setProperty(ListS3.BUCKET, "test-bucket");
    Date lastModified = new Date();
    ObjectListing objectListing = new ObjectListing();
    S3ObjectSummary objectSummary1 = new S3ObjectSummary();
    objectSummary1.setBucketName("test-bucket");
    objectSummary1.setKey("a");
    objectSummary1.setLastModified(lastModified);
    objectListing.getObjectSummaries().add(objectSummary1);
    S3ObjectSummary objectSummary2 = new S3ObjectSummary();
    objectSummary2.setBucketName("test-bucket");
    objectSummary2.setKey("b/c");
    objectSummary2.setLastModified(lastModified);
    objectListing.getObjectSummaries().add(objectSummary2);
    S3ObjectSummary objectSummary3 = new S3ObjectSummary();
    objectSummary3.setBucketName("test-bucket");
    objectSummary3.setKey("d/e");
    objectSummary3.setLastModified(lastModified);
    objectListing.getObjectSummaries().add(objectSummary3);
    Mockito.when(mockS3Client.listObjects(Mockito.any(ListObjectsRequest.class))).thenReturn(objectListing);
    runner.run();
    ArgumentCaptor<ListObjectsRequest> captureRequest = ArgumentCaptor.forClass(ListObjectsRequest.class);
    Mockito.verify(mockS3Client, Mockito.times(1)).listObjects(captureRequest.capture());
    ListObjectsRequest request = captureRequest.getValue();
    assertEquals("test-bucket", request.getBucketName());
    Mockito.verify(mockS3Client, Mockito.never()).listVersions(Mockito.any());
    runner.assertAllFlowFilesTransferred(ListS3.REL_SUCCESS, 3);
    List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(ListS3.REL_SUCCESS);
    MockFlowFile ff0 = flowFiles.get(0);
    ff0.assertAttributeEquals("filename", "a");
    ff0.assertAttributeEquals("s3.bucket", "test-bucket");
    String lastModifiedTimestamp = String.valueOf(lastModified.getTime());
    ff0.assertAttributeEquals("s3.lastModified", lastModifiedTimestamp);
    flowFiles.get(1).assertAttributeEquals("filename", "b/c");
    flowFiles.get(2).assertAttributeEquals("filename", "d/e");
    runner.getStateManager().assertStateEquals(ListS3.CURRENT_TIMESTAMP, lastModifiedTimestamp, Scope.CLUSTER);
}
Also used : MockFlowFile(org.apache.nifi.util.MockFlowFile) ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) Date(java.util.Date) Test(org.junit.Test)

Example 39 with ListObjectsRequest

use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project dataverse by IQSS.

the class S3AccessIO method deleteAllAuxObjects.

@Override
public void deleteAllAuxObjects() throws IOException {
    if (!this.canWrite()) {
        open(DataAccessOption.WRITE_ACCESS);
    }
    String prefix = getDestinationKey("");
    List<S3ObjectSummary> storedAuxFilesSummary = null;
    try {
        ListObjectsRequest req = new ListObjectsRequest().withBucketName(bucketName).withPrefix(prefix);
        ObjectListing storedAuxFilesList = s3.listObjects(req);
        storedAuxFilesSummary = storedAuxFilesList.getObjectSummaries();
        while (storedAuxFilesList.isTruncated()) {
            storedAuxFilesList = s3.listNextBatchOfObjects(storedAuxFilesList);
            storedAuxFilesSummary.addAll(storedAuxFilesList.getObjectSummaries());
        }
    } catch (AmazonClientException ase) {
        logger.warning("Caught an AmazonServiceException:    " + ase.getMessage());
        throw new IOException("S3AccessIO: Failed to get aux objects for listing to delete.");
    }
    DeleteObjectsRequest multiObjectDeleteRequest = new DeleteObjectsRequest(bucketName);
    List<KeyVersion> keys = new ArrayList<>();
    for (S3ObjectSummary item : storedAuxFilesSummary) {
        String destinationKey = item.getKey();
        keys.add(new KeyVersion(destinationKey));
    }
    // Check if the list of auxiliary files for a data file is empty
    if (keys.isEmpty()) {
        logger.fine("S3AccessIO: No auxiliary objects to delete.");
        return;
    }
    multiObjectDeleteRequest.setKeys(keys);
    logger.fine("Trying to delete auxiliary files...");
    try {
        s3.deleteObjects(multiObjectDeleteRequest);
    } catch (MultiObjectDeleteException e) {
        logger.warning("S3AccessIO: Unable to delete auxilary objects" + e.getMessage());
        throw new IOException("S3AccessIO: Failed to delete one or more auxiliary objects.");
    }
}
Also used : ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest) KeyVersion(com.amazonaws.services.s3.model.DeleteObjectsRequest.KeyVersion) MultiObjectDeleteException(com.amazonaws.services.s3.model.MultiObjectDeleteException) AmazonClientException(com.amazonaws.AmazonClientException) ArrayList(java.util.ArrayList) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) IOException(java.io.IOException) DeleteObjectsRequest(com.amazonaws.services.s3.model.DeleteObjectsRequest)

Example 40 with ListObjectsRequest

use of software.amazon.awssdk.services.s3.model.ListObjectsRequest in project jackrabbit-oak by apache.

the class S3Backend method getAllMetadataRecords.

@Override
public List<DataRecord> getAllMetadataRecords(String prefix) {
    List<DataRecord> metadataList = new ArrayList<DataRecord>();
    ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
    try {
        Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
        ListObjectsRequest listObjectsRequest = new ListObjectsRequest().withBucketName(bucket).withPrefix(addMetaKeyPrefix(prefix));
        ObjectListing prevObjectListing = s3service.listObjects(listObjectsRequest);
        for (final S3ObjectSummary s3ObjSumm : prevObjectListing.getObjectSummaries()) {
            metadataList.add(new S3DataRecord(this, s3service, bucket, new DataIdentifier(stripMetaKeyPrefix(s3ObjSumm.getKey())), s3ObjSumm.getLastModified().getTime(), s3ObjSumm.getSize(), true));
        }
    } finally {
        if (contextClassLoader != null) {
            Thread.currentThread().setContextClassLoader(contextClassLoader);
        }
    }
    return metadataList;
}
Also used : ListObjectsRequest(com.amazonaws.services.s3.model.ListObjectsRequest) DataIdentifier(org.apache.jackrabbit.core.data.DataIdentifier) ArrayList(java.util.ArrayList) ObjectListing(com.amazonaws.services.s3.model.ObjectListing) S3ObjectSummary(com.amazonaws.services.s3.model.S3ObjectSummary) AbstractDataRecord(org.apache.jackrabbit.oak.spi.blob.AbstractDataRecord) DataRecord(org.apache.jackrabbit.core.data.DataRecord)

Aggregations

ListObjectsRequest (com.amazonaws.services.s3.model.ListObjectsRequest)48 ObjectListing (com.amazonaws.services.s3.model.ObjectListing)46 S3ObjectSummary (com.amazonaws.services.s3.model.S3ObjectSummary)32 ArrayList (java.util.ArrayList)23 AmazonClientException (com.amazonaws.AmazonClientException)11 IOException (java.io.IOException)9 Path (org.apache.hadoop.fs.Path)9 HashMap (java.util.HashMap)8 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)8 DeleteObjectsRequest (com.amazonaws.services.s3.model.DeleteObjectsRequest)6 FileStatus (org.apache.hadoop.fs.FileStatus)6 ListObjectsRequest (software.amazon.awssdk.services.s3.model.ListObjectsRequest)6 ListObjectsResponse (software.amazon.awssdk.services.s3.model.ListObjectsResponse)6 Date (java.util.Date)5 Test (org.junit.Test)5 S3Object (software.amazon.awssdk.services.s3.model.S3Object)5 StocatorPath (com.ibm.stocator.fs.common.StocatorPath)4 FileNotFoundException (java.io.FileNotFoundException)4 S3Exception (software.amazon.awssdk.services.s3.model.S3Exception)4 AmazonServiceException (com.amazonaws.AmazonServiceException)3