Search in sources :

Example 6 with Objects

use of com.google.api.services.storage.model.Objects in project beam by apache.

the class GcsUtilTest method testRecursiveGlobExpansion.

@Test
public void testRecursiveGlobExpansion() throws IOException {
    GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
    GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
    Storage mockStorage = Mockito.mock(Storage.class);
    gcsUtil.setStorageClient(mockStorage);
    Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
    Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);
    Storage.Objects.List mockStorageList = Mockito.mock(Storage.Objects.List.class);
    Objects modelObjects = new Objects();
    List<StorageObject> items = new ArrayList<>();
    // A directory
    items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/"));
    // Files within the directory
    items.add(new StorageObject().setBucket("testbucket").setName("test/directory/file1.txt"));
    items.add(new StorageObject().setBucket("testbucket").setName("test/directory/file2.txt"));
    items.add(new StorageObject().setBucket("testbucket").setName("test/directory/file3.txt"));
    items.add(new StorageObject().setBucket("testbucket").setName("test/directory/otherfile"));
    items.add(new StorageObject().setBucket("testbucket").setName("test/directory/anotherfile"));
    items.add(new StorageObject().setBucket("testbucket").setName("test/file4.txt"));
    modelObjects.setItems(items);
    when(mockStorage.objects()).thenReturn(mockStorageObjects);
    when(mockStorageObjects.get("testbucket", "test/directory/otherfile")).thenReturn(mockStorageGet);
    when(mockStorageObjects.list("testbucket")).thenReturn(mockStorageList);
    when(mockStorageGet.execute()).thenReturn(new StorageObject().setBucket("testbucket").setName("test/directory/otherfile"));
    when(mockStorageList.execute()).thenReturn(modelObjects);
    {
        GcsPath pattern = GcsPath.fromUri("gs://testbucket/test/**/*.txt");
        List<GcsPath> expectedFiles = ImmutableList.of(GcsPath.fromUri("gs://testbucket/test/directory/file1.txt"), GcsPath.fromUri("gs://testbucket/test/directory/file2.txt"), GcsPath.fromUri("gs://testbucket/test/directory/file3.txt"), GcsPath.fromUri("gs://testbucket/test/file4.txt"));
        assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
    }
}
Also used : Storage(com.google.api.services.storage.Storage) StorageObject(com.google.api.services.storage.model.StorageObject) Objects(com.google.api.services.storage.model.Objects) ArrayList(java.util.ArrayList) GcsPath(org.apache.beam.sdk.util.gcsfs.GcsPath) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) GcsOptions(org.apache.beam.sdk.extensions.gcp.options.GcsOptions) Test(org.junit.Test)

Example 7 with Objects

use of com.google.api.services.storage.model.Objects in project beam by apache.

the class GcsUtilTest method testGlobExpansion.

@Test
public void testGlobExpansion() throws IOException {
    GcsOptions pipelineOptions = gcsOptionsWithTestCredential();
    GcsUtil gcsUtil = pipelineOptions.getGcsUtil();
    Storage mockStorage = Mockito.mock(Storage.class);
    gcsUtil.setStorageClient(mockStorage);
    Storage.Objects mockStorageObjects = Mockito.mock(Storage.Objects.class);
    Storage.Objects.Get mockStorageGet = Mockito.mock(Storage.Objects.Get.class);
    Storage.Objects.List mockStorageList = Mockito.mock(Storage.Objects.List.class);
    Objects modelObjects = new Objects();
    List<StorageObject> items = new ArrayList<>();
    // A directory
    items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/"));
    // Files within the directory
    items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/file1name"));
    items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/file2name"));
    items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/file3name"));
    items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/otherfile"));
    items.add(new StorageObject().setBucket("testbucket").setName("testdirectory/anotherfile"));
    modelObjects.setItems(items);
    when(mockStorage.objects()).thenReturn(mockStorageObjects);
    when(mockStorageObjects.get("testbucket", "testdirectory/otherfile")).thenReturn(mockStorageGet);
    when(mockStorageObjects.list("testbucket")).thenReturn(mockStorageList);
    when(mockStorageGet.execute()).thenReturn(new StorageObject().setBucket("testbucket").setName("testdirectory/otherfile"));
    when(mockStorageList.execute()).thenReturn(modelObjects);
    // Test a single file.
    {
        GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/otherfile");
        List<GcsPath> expectedFiles = ImmutableList.of(GcsPath.fromUri("gs://testbucket/testdirectory/otherfile"));
        assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
    }
    // Test patterns.
    {
        GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/file*");
        List<GcsPath> expectedFiles = ImmutableList.of(GcsPath.fromUri("gs://testbucket/testdirectory/file1name"), GcsPath.fromUri("gs://testbucket/testdirectory/file2name"), GcsPath.fromUri("gs://testbucket/testdirectory/file3name"));
        assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
    }
    {
        GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/file[1-3]*");
        List<GcsPath> expectedFiles = ImmutableList.of(GcsPath.fromUri("gs://testbucket/testdirectory/file1name"), GcsPath.fromUri("gs://testbucket/testdirectory/file2name"), GcsPath.fromUri("gs://testbucket/testdirectory/file3name"));
        assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
    }
    {
        GcsPath pattern = GcsPath.fromUri("gs://testbucket/testdirectory/file?name");
        List<GcsPath> expectedFiles = ImmutableList.of(GcsPath.fromUri("gs://testbucket/testdirectory/file1name"), GcsPath.fromUri("gs://testbucket/testdirectory/file2name"), GcsPath.fromUri("gs://testbucket/testdirectory/file3name"));
        assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
    }
    {
        GcsPath pattern = GcsPath.fromUri("gs://testbucket/test*ectory/fi*name");
        List<GcsPath> expectedFiles = ImmutableList.of(GcsPath.fromUri("gs://testbucket/testdirectory/file1name"), GcsPath.fromUri("gs://testbucket/testdirectory/file2name"), GcsPath.fromUri("gs://testbucket/testdirectory/file3name"));
        assertThat(expectedFiles, contains(gcsUtil.expand(pattern).toArray()));
    }
}
Also used : Storage(com.google.api.services.storage.Storage) StorageObject(com.google.api.services.storage.model.StorageObject) Objects(com.google.api.services.storage.model.Objects) ArrayList(java.util.ArrayList) GcsPath(org.apache.beam.sdk.util.gcsfs.GcsPath) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) GcsOptions(org.apache.beam.sdk.extensions.gcp.options.GcsOptions) Test(org.junit.Test)

Example 8 with Objects

use of com.google.api.services.storage.model.Objects in project beam by apache.

the class GcsUtil method listObjects.

/**
   * Lists {@link Objects} given the {@code bucket}, {@code prefix}, {@code pageToken}.
   */
public Objects listObjects(String bucket, String prefix, @Nullable String pageToken) throws IOException {
    // List all objects that start with the prefix (including objects in sub-directories).
    Storage.Objects.List listObject = storageClient.objects().list(bucket);
    listObject.setMaxResults(MAX_LIST_ITEMS_PER_CALL);
    listObject.setPrefix(prefix);
    if (pageToken != null) {
        listObject.setPageToken(pageToken);
    }
    try {
        return ResilientOperation.retry(ResilientOperation.getGoogleRequestCallable(listObject), createBackOff(), RetryDeterminer.SOCKET_ERRORS, IOException.class);
    } catch (Exception e) {
        throw new IOException(String.format("Unable to match files in bucket %s, prefix %s.", bucket, prefix), e);
    }
}
Also used : Objects(com.google.api.services.storage.model.Objects) IOException(java.io.IOException) GoogleJsonResponseException(com.google.api.client.googleapis.json.GoogleJsonResponseException) FileNotFoundException(java.io.FileNotFoundException) AccessDeniedException(java.nio.file.AccessDeniedException) IOException(java.io.IOException) FileAlreadyExistsException(java.nio.file.FileAlreadyExistsException) ExecutionException(java.util.concurrent.ExecutionException)

Example 9 with Objects

use of com.google.api.services.storage.model.Objects in project beam by apache.

the class GcsUtil method expand.

/**
   * Expands a pattern into matched paths. The pattern path may contain globs, which are expanded
   * in the result. For patterns that only match a single object, we ensure that the object
   * exists.
   */
public List<GcsPath> expand(GcsPath gcsPattern) throws IOException {
    Pattern p = null;
    String prefix = null;
    if (isWildcard(gcsPattern)) {
        // Part before the first wildcard character.
        prefix = getNonWildcardPrefix(gcsPattern.getObject());
        p = Pattern.compile(wildcardToRegexp(gcsPattern.getObject()));
    } else {
        // Not a wildcard.
        try {
            // Use a get request to fetch the metadata of the object, and ignore the return value.
            // The request has strong global consistency.
            getObject(gcsPattern);
            return ImmutableList.of(gcsPattern);
        } catch (FileNotFoundException e) {
            // If the path was not found, return an empty list.
            return ImmutableList.of();
        }
    }
    LOG.debug("matching files in bucket {}, prefix {} against pattern {}", gcsPattern.getBucket(), prefix, p.toString());
    String pageToken = null;
    List<GcsPath> results = new LinkedList<>();
    do {
        Objects objects = listObjects(gcsPattern.getBucket(), prefix, pageToken);
        if (objects.getItems() == null) {
            break;
        }
        // Filter objects based on the regex.
        for (StorageObject o : objects.getItems()) {
            String name = o.getName();
            // Skip directories, which end with a slash.
            if (p.matcher(name).matches() && !name.endsWith("/")) {
                LOG.debug("Matched object: {}", name);
                results.add(GcsPath.fromObject(o));
            }
        }
        pageToken = objects.getNextPageToken();
    } while (pageToken != null);
    return results;
}
Also used : Pattern(java.util.regex.Pattern) StorageObject(com.google.api.services.storage.model.StorageObject) FileNotFoundException(java.io.FileNotFoundException) GcsPath(org.apache.beam.sdk.util.gcsfs.GcsPath) Objects(com.google.api.services.storage.model.Objects) LinkedList(java.util.LinkedList)

Example 10 with Objects

use of com.google.api.services.storage.model.Objects in project druid by druid-io.

the class GoogleTimestampVersionedDataFinder method getLatestVersion.

@Override
public URI getLatestVersion(URI descriptorBase, @Nullable Pattern pattern) {
    try {
        long mostRecent = Long.MIN_VALUE;
        URI latest = null;
        final CloudObjectLocation baseLocation = new CloudObjectLocation(descriptorBase);
        final Objects objects = storage.list(baseLocation.getBucket()).setPrefix(baseLocation.getPath()).setMaxResults(MAX_LISTING_KEYS).execute();
        for (StorageObject storageObject : objects.getItems()) {
            if (GoogleUtils.isDirectoryPlaceholder(storageObject)) {
                continue;
            }
            // remove path prefix from file name
            final CloudObjectLocation objectLocation = new CloudObjectLocation(storageObject.getBucket(), storageObject.getName());
            final String keyString = StringUtils.maybeRemoveLeadingSlash(storageObject.getName().substring(baseLocation.getPath().length()));
            if (pattern != null && !pattern.matcher(keyString).matches()) {
                continue;
            }
            final long latestModified = storageObject.getUpdated().getValue();
            if (latestModified >= mostRecent) {
                mostRecent = latestModified;
                latest = objectLocation.toUri(GoogleStorageDruidModule.SCHEME_GS);
            }
        }
        return latest;
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}
Also used : StorageObject(com.google.api.services.storage.model.StorageObject) CloudObjectLocation(org.apache.druid.data.input.impl.CloudObjectLocation) Objects(com.google.api.services.storage.model.Objects) IOException(java.io.IOException) URI(java.net.URI)

Aggregations

Objects (com.google.api.services.storage.model.Objects)17 StorageObject (com.google.api.services.storage.model.StorageObject)14 ArrayList (java.util.ArrayList)11 Test (org.junit.Test)6 Storage (com.google.api.services.storage.Storage)5 FileNotFoundException (java.io.FileNotFoundException)5 IOException (java.io.IOException)5 List (java.util.List)5 GcsPath (org.apache.beam.sdk.extensions.gcp.util.gcsfs.GcsPath)5 GcsOptions (org.apache.beam.sdk.extensions.gcp.options.GcsOptions)4 LinkedList (java.util.LinkedList)3 Pattern (java.util.regex.Pattern)3 GcsPath (org.apache.beam.sdk.util.gcsfs.GcsPath)3 ImmutableList (org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList)3 GoogleJsonResponseException (com.google.api.client.googleapis.json.GoogleJsonResponseException)2 GoogleCloudStorage (com.google.cloud.hadoop.gcsio.GoogleCloudStorage)2 ImmutableList (com.google.common.collect.ImmutableList)2 URI (java.net.URI)2 AccessDeniedException (java.nio.file.AccessDeniedException)2 FileAlreadyExistsException (java.nio.file.FileAlreadyExistsException)2