use of org.apache.beam.sdk.extensions.gcp.util.GcsUtil in project beam by apache.
the class GcsKmsKeyIT method testGcsWriteWithKmsKey.
/**
* Tests writing to tempLocation with --dataflowKmsKey set on the command line. Verifies that
* resulting output uses specified key and is readable. Does not verify any temporary files.
*
* <p>This test verifies that GCS file copies work with CMEK-enabled files.
*/
@Test
public void testGcsWriteWithKmsKey() {
TestPipelineOptions options = TestPipeline.testingPipelineOptions().as(TestPipelineOptions.class);
assertNotNull(options.getTempRoot());
options.setTempLocation(options.getTempRoot() + "/testGcsWriteWithKmsKey");
GcsOptions gcsOptions = options.as(GcsOptions.class);
ResourceId filenamePrefix = FileSystems.matchNewResource(gcsOptions.getGcpTempLocation(), true).resolve(String.format("GcsKmsKeyIT-%tF-%<tH-%<tM-%<tS-%<tL.output", new Date()), StandardResolveOptions.RESOLVE_FILE);
Pipeline p = Pipeline.create(options);
p.apply("ReadLines", TextIO.read().from(INPUT_FILE)).apply("WriteLines", TextIO.write().to(filenamePrefix));
PipelineResult result = p.run();
State state = result.waitUntilFinish();
assertThat(state, equalTo(State.DONE));
String filePattern = filenamePrefix + "*-of-*";
assertThat(new NumberedShardedFile(filePattern), fileContentsHaveChecksum(EXPECTED_CHECKSUM));
// Verify objects have KMS key set.
try {
MatchResult matchResult = Iterables.getOnlyElement(FileSystems.match(Collections.singletonList(filePattern)));
GcsUtil gcsUtil = gcsOptions.getGcsUtil();
for (Metadata metadata : matchResult.metadata()) {
String kmsKey = gcsUtil.getObject(GcsPath.fromUri(metadata.resourceId().toString())).getKmsKeyName();
assertNotNull(kmsKey);
}
} catch (IOException e) {
throw new AssertionError(e);
}
}
use of org.apache.beam.sdk.extensions.gcp.util.GcsUtil in project beam by apache.
the class TfIdf method listInputDocuments.
/**
* Lists documents contained beneath the {@code options.input} prefix/directory.
*/
public static Set<URI> listInputDocuments(Options options) throws URISyntaxException, IOException {
URI baseUri = new URI(options.getInput());
// List all documents in the directory or GCS prefix.
URI absoluteUri;
if (baseUri.getScheme() != null) {
absoluteUri = baseUri;
} else {
absoluteUri = new URI("file", baseUri.getAuthority(), baseUri.getPath(), baseUri.getQuery(), baseUri.getFragment());
}
Set<URI> uris = new HashSet<>();
if ("file".equals(absoluteUri.getScheme())) {
File directory = new File(absoluteUri);
for (String entry : Optional.fromNullable(directory.list()).or(new String[] {})) {
File path = new File(directory, entry);
uris.add(path.toURI());
}
} else if ("gs".equals(absoluteUri.getScheme())) {
GcsUtil gcsUtil = options.as(GcsOptions.class).getGcsUtil();
URI gcsUriGlob = new URI(absoluteUri.getScheme(), absoluteUri.getAuthority(), absoluteUri.getPath() + "*", absoluteUri.getQuery(), absoluteUri.getFragment());
for (GcsPath entry : gcsUtil.expand(GcsPath.fromUri(gcsUriGlob))) {
uris.add(entry.toUri());
}
}
return uris;
}
use of org.apache.beam.sdk.extensions.gcp.util.GcsUtil in project beam by apache.
the class DataflowRunnerTest method buildMockGcsUtil.
private static GcsUtil buildMockGcsUtil() throws IOException {
GcsUtil mockGcsUtil = mock(GcsUtil.class);
when(mockGcsUtil.create(any(GcsPath.class), any(GcsUtil.CreateOptions.class))).then(invocation -> FileChannel.open(Files.createTempFile("channel-", ".tmp"), StandardOpenOption.CREATE, StandardOpenOption.WRITE, StandardOpenOption.DELETE_ON_CLOSE));
when(mockGcsUtil.create(any(GcsPath.class), any(GcsUtil.CreateOptions.class))).then(invocation -> FileChannel.open(Files.createTempFile("channel-", ".tmp"), StandardOpenOption.CREATE, StandardOpenOption.WRITE, StandardOpenOption.DELETE_ON_CLOSE));
when(mockGcsUtil.expand(any(GcsPath.class))).then(invocation -> ImmutableList.of((GcsPath) invocation.getArguments()[0]));
when(mockGcsUtil.bucketAccessible(GcsPath.fromUri(VALID_STAGING_BUCKET))).thenReturn(true);
when(mockGcsUtil.bucketAccessible(GcsPath.fromUri(VALID_TEMP_BUCKET))).thenReturn(true);
when(mockGcsUtil.bucketAccessible(GcsPath.fromUri(VALID_TEMP_BUCKET + "/staging/"))).thenReturn(true);
when(mockGcsUtil.bucketAccessible(GcsPath.fromUri(VALID_PROFILE_BUCKET))).thenReturn(true);
when(mockGcsUtil.bucketAccessible(GcsPath.fromUri(NON_EXISTENT_BUCKET))).thenReturn(false);
// Let every valid path be matched
when(mockGcsUtil.getObjects(anyListOf(GcsPath.class))).thenAnswer(invocationOnMock -> {
List<GcsPath> gcsPaths = (List<GcsPath>) invocationOnMock.getArguments()[0];
List<GcsUtil.StorageObjectOrIOException> results = new ArrayList<>();
for (GcsPath gcsPath : gcsPaths) {
if (gcsPath.getBucket().equals(VALID_BUCKET)) {
StorageObject resultObject = new StorageObject();
resultObject.setBucket(gcsPath.getBucket());
resultObject.setName(gcsPath.getObject());
results.add(GcsUtil.StorageObjectOrIOException.create(resultObject));
}
}
return results;
});
// The dataflow pipeline attempts to output to this location.
when(mockGcsUtil.bucketAccessible(GcsPath.fromUri("gs://bucket/object"))).thenReturn(true);
return mockGcsUtil;
}
use of org.apache.beam.sdk.extensions.gcp.util.GcsUtil in project beam by apache.
the class MinimalWordCountTest method buildMockGcsUtil.
private GcsUtil buildMockGcsUtil() throws IOException {
GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class);
// Any request to open gets a new bogus channel
Mockito.when(mockGcsUtil.open(Mockito.any(GcsPath.class))).then(invocation -> FileChannel.open(Files.createTempFile("channel-", ".tmp"), StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE));
// Any request for expansion returns a list containing the original GcsPath
// This is required to pass validation that occurs in TextIO during apply()
Mockito.when(mockGcsUtil.expand(Mockito.any(GcsPath.class))).then(invocation -> ImmutableList.of((GcsPath) invocation.getArguments()[0]));
return mockGcsUtil;
}
use of org.apache.beam.sdk.extensions.gcp.util.GcsUtil in project beam by apache.
the class ExampleEchoPipelineTest method buildMockGcsUtil.
private GcsUtil buildMockGcsUtil() throws IOException {
GcsUtil mockGcsUtil = Mockito.mock(GcsUtil.class);
// Any request to open gets a new bogus channel
Mockito.when(mockGcsUtil.open(Mockito.any(GcsPath.class))).then(new Answer<SeekableByteChannel>() {
@Override
public SeekableByteChannel answer(InvocationOnMock invocation) throws Throwable {
return FileChannel.open(Files.createTempFile("channel-", ".tmp"), StandardOpenOption.CREATE, StandardOpenOption.DELETE_ON_CLOSE);
}
});
// Any request for expansion returns a list containing the original GcsPath
// This is required to pass validation that occurs in TextIO during apply()
Mockito.when(mockGcsUtil.expand(Mockito.any(GcsPath.class))).then(new Answer<List<GcsPath>>() {
@Override
public List<GcsPath> answer(InvocationOnMock invocation) throws Throwable {
return ImmutableList.of((GcsPath) invocation.getArguments()[0]);
}
});
return mockGcsUtil;
}
Aggregations