use of org.apache.flink.fs.gs.storage.GSBlobIdentifier in project flink by apache.
the class GSRecoverableWriterCommitterTest method commitTestInternal.
/**
* Internal commit function called by other tests. Writes some number of blobs, creates a commit
* recoverable after some number of them (possibly not all of them!), and then commits.
*
* @return The committer
* @throws IOException On underlying failure
*/
private GSRecoverableWriterCommitter commitTestInternal() throws IOException {
// this will hold the component object ids to commit
ArrayList<UUID> componentObjectIdsToCommit = new ArrayList<>();
// create the blobs
for (int blobIndex = 0; blobIndex < blobSizes.length; blobIndex++) {
// create the object id and blob identifier
UUID componentObjectId = UUID.randomUUID();
GSBlobIdentifier temporaryBlobIdentifier = BlobUtils.getTemporaryBlobIdentifier(blobIdentifier, componentObjectId, options);
// write the bytes to mock storage
int blobSize = blobSizes[blobIndex];
byte[] bytes = new byte[blobSize];
random.nextBytes(bytes);
blobStorage.blobs.put(temporaryBlobIdentifier, new MockBlobStorage.BlobValue(bytes));
// that we expect to see the associated bytes in the result
if (blobIndex <= commitBlobCount) {
componentObjectIdsToCommit.add(componentObjectId);
expectedBytes.write(bytes);
}
}
// create the recoverable and commit
GSCommitRecoverable recoverable = new GSCommitRecoverable(blobIdentifier, componentObjectIdsToCommit);
return new GSRecoverableWriterCommitter(blobStorage, options, recoverable, composeMaxBlobs);
}
use of org.apache.flink.fs.gs.storage.GSBlobIdentifier in project flink by apache.
the class GSCommitRecoverableSerializer method deserializeCommitRecoverable.
/**
* Deserializes a commit recoverable from the input stream.
*
* @param dataInputStream The input stream
* @return The commit recoverable
* @throws IOException On underlying failure
*/
static GSCommitRecoverable deserializeCommitRecoverable(DataInputStream dataInputStream) throws IOException {
// finalBlobId
String finalBucketName = dataInputStream.readUTF();
String finalObjectName = dataInputStream.readUTF();
GSBlobIdentifier finalBlobIdentifier = new GSBlobIdentifier(finalBucketName, finalObjectName);
// componentObjectIds
ArrayList<UUID> componentObjectIds = new ArrayList<>();
int count = dataInputStream.readInt();
for (int i = 0; i < count; i++) {
long msbValue = dataInputStream.readLong();
long lsbValue = dataInputStream.readLong();
UUID componentObjectId = new UUID(msbValue, lsbValue);
componentObjectIds.add(componentObjectId);
}
GSCommitRecoverable recoverable = new GSCommitRecoverable(finalBlobIdentifier, componentObjectIds);
LOGGER.trace("Deserialized commit recoverable {}", recoverable);
return recoverable;
}
use of org.apache.flink.fs.gs.storage.GSBlobIdentifier in project flink by apache.
the class GSFileSystemScenarioTest method simpleWriteTest.
/* Test writing a single array of bytes to a stream. */
@Test
public void simpleWriteTest() throws IOException {
// only run the test for valid chunk sizes
assumeTrue(writeChunkSizeIsValid);
// create the options and writer
GSFileSystemOptions options = new GSFileSystemOptions(flinkConfig);
RecoverableWriter writer = new GSRecoverableWriter(storage, options);
// create a stream and write some random bytes to it
RecoverableFsDataOutputStream stream = writer.open(path);
byte[] data = new byte[128];
random.nextBytes(data);
stream.write(data);
// close for commit
RecoverableFsDataOutputStream.Committer committer = stream.closeForCommit();
// there should be a single blob now, in the specified temporary bucket or, if no temporary
// bucket
// specified, in the final bucket
assertEquals(1, storage.blobs.size());
GSBlobIdentifier temporaryBlobIdentifier = (GSBlobIdentifier) storage.blobs.keySet().toArray()[0];
String expectedTemporaryBucket = StringUtils.isNullOrWhitespaceOnly(temporaryBucketName) ? blobIdentifier.bucketName : temporaryBucketName;
assertEquals(expectedTemporaryBucket, temporaryBlobIdentifier.bucketName);
// commit
committer.commit();
// there should be exactly one blob after commit, with the expected contents.
// all temporary blobs should be removed.
assertEquals(1, storage.blobs.size());
MockBlobStorage.BlobValue blobValue = storage.blobs.get(blobIdentifier);
assertArrayEquals(data, blobValue.content);
}
use of org.apache.flink.fs.gs.storage.GSBlobIdentifier in project flink by apache.
the class GSFileSystemScenarioTest method before.
@Before
public void before() {
random = new Random(TestUtils.RANDOM_SEED);
// construct the flink configuration
flinkConfig = new Configuration();
if (!StringUtils.isNullOrWhitespaceOnly(temporaryBucketName)) {
flinkConfig.set(GSFileSystemOptions.WRITER_TEMPORARY_BUCKET_NAME, temporaryBucketName);
}
if (writeChunkSize != null) {
flinkConfig.set(GSFileSystemOptions.WRITER_CHUNK_SIZE, writeChunkSize);
}
if (writeChunkSize == null) {
// unspecified chunk size is valid
writeChunkSizeIsValid = true;
} else {
// chunk size that is > 0 and multiple of 256KB is valid
long byteCount = writeChunkSize.getBytes();
writeChunkSizeIsValid = (byteCount > 0) && (byteCount % (256 * 1024) == 0);
}
storage = new MockBlobStorage();
blobIdentifier = new GSBlobIdentifier("foo", "bar");
path = new Path(String.format("gs://%s/%s", blobIdentifier.bucketName, blobIdentifier.objectName));
}
use of org.apache.flink.fs.gs.storage.GSBlobIdentifier in project flink by apache.
the class BlobUtilsTest method shouldUseIdentifierBucketNameNameIfTemporaryBucketNotSpecified.
@Test
public void shouldUseIdentifierBucketNameNameIfTemporaryBucketNotSpecified() {
Configuration flinkConfig = new Configuration();
GSFileSystemOptions options = new GSFileSystemOptions(flinkConfig);
GSBlobIdentifier identifier = new GSBlobIdentifier("foo", "bar");
String bucketName = BlobUtils.getTemporaryBucketName(identifier, options);
assertEquals("foo", bucketName);
}
Aggregations