Search in sources :

Example 11 with Objects

use of com.google.api.services.storage.model.Objects in project druid by druid-io.

the class GoogleCloudStorageInputSourceTest method addExpectedPrefixObjects.

private static void addExpectedPrefixObjects(URI prefix, List<URI> uris) throws IOException {
    final String bucket = prefix.getAuthority();
    Storage.Objects.List listRequest = EasyMock.createMock(Storage.Objects.List.class);
    EasyMock.expect(STORAGE.list(EasyMock.eq(bucket))).andReturn(listRequest).once();
    EasyMock.expect(listRequest.setPageToken(EasyMock.anyString())).andReturn(listRequest).once();
    EasyMock.expect(listRequest.setMaxResults((long) MAX_LISTING_LENGTH)).andReturn(listRequest).once();
    EasyMock.expect(listRequest.setPrefix(EasyMock.eq(StringUtils.maybeRemoveLeadingSlash(prefix.getPath())))).andReturn(listRequest).once();
    List<StorageObject> mockObjects = new ArrayList<>();
    for (URI uri : uris) {
        StorageObject s = new StorageObject();
        s.setBucket(bucket);
        s.setName(uri.getPath());
        s.setSize(BigInteger.valueOf(CONTENT.length));
        mockObjects.add(s);
    }
    Objects response = new Objects();
    response.setItems(mockObjects);
    EasyMock.expect(listRequest.execute()).andReturn(response).once();
    EasyMock.expect(response.getItems()).andReturn(mockObjects).once();
    EasyMock.replay(listRequest);
}
Also used : StorageObject(com.google.api.services.storage.model.StorageObject) Objects(com.google.api.services.storage.model.Objects) ArrayList(java.util.ArrayList) URI(java.net.URI)

Example 12 with Objects

use of com.google.api.services.storage.model.Objects in project beam by apache.

the class GcsUtil method listObjects.

/**
 * Lists {@link Objects} given the {@code bucket}, {@code prefix}, {@code pageToken}.
 *
 * <p>For more details, see https://cloud.google.com/storage/docs/json_api/v1/objects/list.
 */
public Objects listObjects(String bucket, String prefix, @Nullable String pageToken, @Nullable String delimiter) throws IOException {
    // List all objects that start with the prefix (including objects in sub-directories).
    Storage.Objects.List listObject = storageClient.objects().list(bucket);
    listObject.setMaxResults(MAX_LIST_ITEMS_PER_CALL);
    listObject.setPrefix(prefix);
    listObject.setDelimiter(delimiter);
    if (pageToken != null) {
        listObject.setPageToken(pageToken);
    }
    try {
        return ResilientOperation.retry(listObject::execute, createBackOff(), RetryDeterminer.SOCKET_ERRORS, IOException.class);
    } catch (Exception e) {
        throw new IOException(String.format("Unable to match files in bucket %s, prefix %s.", bucket, prefix), e);
    }
}
Also used : Objects(com.google.api.services.storage.model.Objects) IOException(java.io.IOException) GoogleJsonResponseException(com.google.api.client.googleapis.json.GoogleJsonResponseException) FileNotFoundException(java.io.FileNotFoundException) AccessDeniedException(java.nio.file.AccessDeniedException) IOException(java.io.IOException) FileAlreadyExistsException(java.nio.file.FileAlreadyExistsException) ExecutionException(java.util.concurrent.ExecutionException)

Example 13 with Objects

use of com.google.api.services.storage.model.Objects in project beam by apache.

the class GcsUtil method expand.

/**
 * Expands a pattern into matched paths. The pattern path may contain globs, which are expanded in
 * the result. For patterns that only match a single object, we ensure that the object exists.
 */
public List<GcsPath> expand(GcsPath gcsPattern) throws IOException {
    Pattern p = null;
    String prefix = null;
    if (isWildcard(gcsPattern)) {
        // Part before the first wildcard character.
        prefix = getNonWildcardPrefix(gcsPattern.getObject());
        p = Pattern.compile(wildcardToRegexp(gcsPattern.getObject()));
    } else {
        // Not a wildcard.
        try {
            // Use a get request to fetch the metadata of the object, and ignore the return value.
            // The request has strong global consistency.
            getObject(gcsPattern);
            return ImmutableList.of(gcsPattern);
        } catch (FileNotFoundException e) {
            // If the path was not found, return an empty list.
            return ImmutableList.of();
        }
    }
    LOG.debug("matching files in bucket {}, prefix {} against pattern {}", gcsPattern.getBucket(), prefix, p.toString());
    String pageToken = null;
    List<GcsPath> results = new ArrayList<>();
    do {
        Objects objects = listObjects(gcsPattern.getBucket(), prefix, pageToken);
        if (objects.getItems() == null) {
            break;
        }
        // Filter objects based on the regex.
        for (StorageObject o : objects.getItems()) {
            String name = o.getName();
            // Skip directories, which end with a slash.
            if (p.matcher(name).matches() && !name.endsWith("/")) {
                LOG.debug("Matched object: {}", name);
                results.add(GcsPath.fromObject(o));
            }
        }
        pageToken = objects.getNextPageToken();
    } while (pageToken != null);
    return results;
}
Also used : Pattern(java.util.regex.Pattern) StorageObject(com.google.api.services.storage.model.StorageObject) FileNotFoundException(java.io.FileNotFoundException) GcsPath(org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath) ArrayList(java.util.ArrayList) Objects(com.google.api.services.storage.model.Objects)

Example 14 with Objects

use of com.google.api.services.storage.model.Objects in project beam by apache.

the class GcsFileSystemTest method testMatch.

@Test
public void testMatch() throws Exception {
    Objects modelObjects = new Objects();
    List<StorageObject> items = new ArrayList<>();
    // A directory
    items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/"));
    // Files within the directory
    items.add(createStorageObject("gs://testbucket/testdirectory/file1name", 1L));
    items.add(createStorageObject("gs://testbucket/testdirectory/file2name", 2L));
    items.add(createStorageObject("gs://testbucket/testdirectory/file3name", 3L));
    items.add(createStorageObject("gs://testbucket/testdirectory/file4name", 4L));
    items.add(createStorageObject("gs://testbucket/testdirectory/otherfile", 5L));
    items.add(createStorageObject("gs://testbucket/testdirectory/anotherfile", 6L));
    modelObjects.setItems(items);
    when(mockGcsUtil.listObjects(eq("testbucket"), anyString(), isNull(String.class))).thenReturn(modelObjects);
    List<GcsPath> gcsPaths = ImmutableList.of(GcsPath.fromUri("gs://testbucket/testdirectory/non-exist-file"), GcsPath.fromUri("gs://testbucket/testdirectory/otherfile"));
    when(mockGcsUtil.getObjects(eq(gcsPaths))).thenReturn(ImmutableList.of(StorageObjectOrIOException.create(new FileNotFoundException()), StorageObjectOrIOException.create(createStorageObject("gs://testbucket/testdirectory/otherfile", 4L))));
    List<String> specs = ImmutableList.of("gs://testbucket/testdirectory/file[1-3]*", "gs://testbucket/testdirectory/non-exist-file", "gs://testbucket/testdirectory/otherfile");
    List<MatchResult> matchResults = gcsFileSystem.match(specs);
    assertEquals(3, matchResults.size());
    assertEquals(Status.OK, matchResults.get(0).status());
    assertThat(ImmutableList.of("gs://testbucket/testdirectory/file1name", "gs://testbucket/testdirectory/file2name", "gs://testbucket/testdirectory/file3name"), contains(toFilenames(matchResults.get(0)).toArray()));
    assertEquals(Status.NOT_FOUND, matchResults.get(1).status());
    assertEquals(Status.OK, matchResults.get(2).status());
    assertThat(ImmutableList.of("gs://testbucket/testdirectory/otherfile"), contains(toFilenames(matchResults.get(2)).toArray()));
}
Also used : StorageObject(com.google.api.services.storage.model.StorageObject) Objects(com.google.api.services.storage.model.Objects) ArrayList(java.util.ArrayList) GcsPath(org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath) FileNotFoundException(java.io.FileNotFoundException) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) MatchResult(org.apache.beam.sdk.io.fs.MatchResult) Test(org.junit.Test)

Example 15 with Objects

use of com.google.api.services.storage.model.Objects in project beam by apache.

the class GcsFileSystem method expand.

/**
 * Expands a pattern into {@link MatchResult}.
 *
 * @throws IllegalArgumentException if {@code gcsPattern} does not contain globs.
 */
@VisibleForTesting
MatchResult expand(GcsPath gcsPattern) throws IOException {
    String prefix = GcsUtil.getNonWildcardPrefix(gcsPattern.getObject());
    Pattern p = Pattern.compile(wildcardToRegexp(gcsPattern.getObject()));
    LOG.debug("matching files in bucket {}, prefix {} against pattern {}", gcsPattern.getBucket(), prefix, p.toString());
    String pageToken = null;
    List<Metadata> results = new ArrayList<>();
    do {
        Objects objects = options.getGcsUtil().listObjects(gcsPattern.getBucket(), prefix, pageToken);
        if (objects.getItems() == null) {
            break;
        }
        // Filter objects based on the regex.
        for (StorageObject o : objects.getItems()) {
            String name = o.getName();
            // Skip directories, which end with a slash.
            if (p.matcher(name).matches() && !name.endsWith("/")) {
                LOG.debug("Matched object: {}", name);
                results.add(toMetadata(o));
            }
        }
        pageToken = objects.getNextPageToken();
    } while (pageToken != null);
    return MatchResult.create(Status.OK, results);
}
Also used : Pattern(java.util.regex.Pattern) StorageObject(com.google.api.services.storage.model.StorageObject) Metadata(org.apache.beam.sdk.io.fs.MatchResult.Metadata) ArrayList(java.util.ArrayList) Objects(com.google.api.services.storage.model.Objects) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Aggregations

Objects (com.google.api.services.storage.model.Objects)17 StorageObject (com.google.api.services.storage.model.StorageObject)14 ArrayList (java.util.ArrayList)11 Test (org.junit.Test)6 Storage (com.google.api.services.storage.Storage)5 FileNotFoundException (java.io.FileNotFoundException)5 IOException (java.io.IOException)5 List (java.util.List)5 GcsPath (org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath)5 GcsOptions (org.apache.beam.sdk.extensions.gcp.options.GcsOptions)4 LinkedList (java.util.LinkedList)3 Pattern (java.util.regex.Pattern)3 GcsPath (org.apache.beam.sdk.util.gcsfs.GcsPath)3 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)3 GoogleJsonResponseException (com.google.api.client.googleapis.json.GoogleJsonResponseException)2 GoogleCloudStorage (com.google.cloud.hadoop.gcsio.GoogleCloudStorage)2 ImmutableList (com.google.common.collect.ImmutableList)2 URI (java.net.URI)2 AccessDeniedException (java.nio.file.AccessDeniedException)2 FileAlreadyExistsException (java.nio.file.FileAlreadyExistsException)2