use of org.apache.flink.fs.gs.storage.GSBlobIdentifier in project flink by apache.
the class GSRecoverableFsDataOutputStream method createWriteChannel.
private GSChecksumWriteChannel createWriteChannel() {
// add a new component blob id for the new channel to write to
UUID componentObjectId = UUID.randomUUID();
componentObjectIds.add(componentObjectId);
GSBlobIdentifier blobIdentifier = BlobUtils.getTemporaryBlobIdentifier(finalBlobIdentifier, componentObjectId, options);
// create the channel, using an explicit chunk size if specified in options
Optional<MemorySize> writerChunkSize = options.getWriterChunkSize();
GSBlobStorage.WriteChannel writeChannel = writerChunkSize.isPresent() ? storage.writeBlob(blobIdentifier, writerChunkSize.get()) : storage.writeBlob(blobIdentifier);
return new GSChecksumWriteChannel(storage, writeChannel, blobIdentifier);
}
use of org.apache.flink.fs.gs.storage.GSBlobIdentifier in project flink by apache.
the class GSRecoverableWriterCommitter method composeBlobs.
/**
* Helper to compose an arbitrary number of blobs into a final blob, staying under the
* composeMaxBlobs limit for any individual compose operation.
*
* @param sourceBlobIdentifiers The source blob ids to compose
* @param targetBlobIdentifier The target blob id for the composed result
*/
private void composeBlobs(List<GSBlobIdentifier> sourceBlobIdentifiers, GSBlobIdentifier targetBlobIdentifier) {
LOGGER.trace("Composing blobs {} to {} for commit with options {}", sourceBlobIdentifiers, targetBlobIdentifier, options);
Preconditions.checkNotNull(sourceBlobIdentifiers);
Preconditions.checkArgument(sourceBlobIdentifiers.size() > 0);
Preconditions.checkNotNull(targetBlobIdentifier);
// split the source list into two parts; first, the ones we can compose in this operation
// (up to composeMaxBlobs), and, second, whichever blobs are left over
final int composeToIndex = Math.min(composeMaxBlobs, sourceBlobIdentifiers.size());
List<GSBlobIdentifier> composeBlobIds = sourceBlobIdentifiers.subList(0, composeToIndex);
List<GSBlobIdentifier> remainingBlobIds = sourceBlobIdentifiers.subList(composeToIndex, sourceBlobIdentifiers.size());
// determine the resulting blob id for this compose operation. if this is the last compose,
// i.e. if there are no remaining blob ids, then the composed blob id is the originally
// specified target blob id. otherwise, we must create an intermediate blob id to hold the
// result of this compose operation
UUID temporaryObjectId = UUID.randomUUID();
GSBlobIdentifier composedBlobId = remainingBlobIds.isEmpty() ? targetBlobIdentifier : BlobUtils.getTemporaryBlobIdentifier(recoverable.finalBlobIdentifier, temporaryObjectId, options);
// compose the blobs
storage.compose(composeBlobIds, composedBlobId);
// of remaining blob ids, and recurse
if (!remainingBlobIds.isEmpty()) {
remainingBlobIds.add(0, composedBlobId);
composeBlobs(remainingBlobIds, targetBlobIdentifier);
}
}
use of org.apache.flink.fs.gs.storage.GSBlobIdentifier in project flink by apache.
the class GSRecoverableWriterCommitter method writeFinalBlob.
/**
* Writes the final blob by composing the temporary blobs and copying, if necessary.
*/
private void writeFinalBlob() {
// do we have any component blobs?
List<GSBlobIdentifier> blobIdentifiers = recoverable.getComponentBlobIds(options);
if (blobIdentifiers.isEmpty()) {
// we have no blob identifiers, so just create an empty target blob
storage.createBlob(recoverable.finalBlobIdentifier);
} else {
// yes, we have component blobs. compose them into the final blob id. if the component
// blob ids are in the same bucket as the final blob id, this can be done directly.
// otherwise, we must compose to a new temporary blob id in the same bucket as the
// component blob ids and then copy that blob to the final blob location
String temporaryBucketName = BlobUtils.getTemporaryBucketName(recoverable.finalBlobIdentifier, options);
if (recoverable.finalBlobIdentifier.bucketName.equals(temporaryBucketName)) {
// compose directly to final blob
composeBlobs(recoverable.getComponentBlobIds(options), recoverable.finalBlobIdentifier);
} else {
// compose to the intermediate blob, then copy
UUID temporaryObjectId = UUID.randomUUID();
GSBlobIdentifier intermediateBlobIdentifier = BlobUtils.getTemporaryBlobIdentifier(recoverable.finalBlobIdentifier, temporaryObjectId, options);
composeBlobs(recoverable.getComponentBlobIds(options), intermediateBlobIdentifier);
storage.copy(intermediateBlobIdentifier, recoverable.finalBlobIdentifier);
}
}
}
use of org.apache.flink.fs.gs.storage.GSBlobIdentifier in project flink by apache.
the class GSChecksumWriteChannelTest method before.
@Before
public void before() throws IOException {
Random random = new Random();
random.setSeed(RANDOM_SEED);
blobIdentifier = new GSBlobIdentifier("foo", "bar");
// initialize the byte buffers and determine what we expect to be written
byteBuffers = new byte[bufferSizes.length][];
try (ByteArrayOutputStream stream = new ByteArrayOutputStream()) {
for (int i = 0; i < bufferSizes.length; i++) {
int size = bufferSizes[i];
byteBuffers[i] = new byte[size];
random.nextBytes(byteBuffers[i]);
int start = writeStarts[i];
int length = writeLengths[i];
stream.write(byteBuffers[i], start, length);
}
stream.flush();
expectedWrittenBytes = stream.toByteArray();
}
}
use of org.apache.flink.fs.gs.storage.GSBlobIdentifier in project flink by apache.
the class GSCommitRecoverableTest method data.
@Parameterized.Parameters(name = "componentObjectIds={0}, temporaryBucketName={1}")
public static Collection<Object[]> data() {
ArrayList<UUID> emptyComponentObjectIds = new ArrayList<>();
ArrayList<UUID> populatedComponentObjectIds = new ArrayList<>();
for (int i = 0; i < 2; i++) {
populatedComponentObjectIds.add(UUID.randomUUID());
}
GSBlobIdentifier blobIdentifier = new GSBlobIdentifier("foo", "bar");
return Arrays.asList(new Object[][] { // no component ids with no temporary bucket specified
{ emptyComponentObjectIds, null }, // no component ids with a temporary bucket specified
{ emptyComponentObjectIds, "temporary-bucket" }, // populated component ids with no temporary bucket specified
{ populatedComponentObjectIds, null }, // populated component ids with temporary bucket specified
{ populatedComponentObjectIds, "temporary-bucket" } });
}
Aggregations