use of com.azure.storage.blob.specialized.BlockBlobAsyncClient in project samza by apache.
the class TestAzureBlobAvroWriter method testRecordLimitExceeded.
@Test
public void testRecordLimitExceeded() throws Exception {
String blobUrlPrefix = "test";
String blobNameRegex = "test/[0-9]{4}/[0-9]{2}/[0-9]{2}/[0-9]{2}/[0-9]{2}-[0-9]{2}-.{8}.avro.gz";
AzureBlobWriterMetrics mockMetrics = mock(AzureBlobWriterMetrics.class);
long maxBlobSize = AzureBlobAvroWriter.DATAFILEWRITER_OVERHEAD + 1000;
long maxRecordsPerBlob = 10;
BlobContainerAsyncClient mockContainerClient = PowerMockito.mock(BlobContainerAsyncClient.class);
azureBlobAvroWriter = spy(new AzureBlobAvroWriter(mockContainerClient, mockMetrics, threadPool, THRESHOLD, 60000, blobUrlPrefix, null, null, null, blobMetadataGeneratorFactory, blobMetadataGeneratorConfig, STREAM_NAME, maxBlobSize, maxRecordsPerBlob, mockCompression, true));
DataFileWriter mockDataFileWriter1 = mock(DataFileWriter.class);
PowerMockito.whenNew(DataFileWriter.class).withAnyArguments().thenReturn(mockDataFileWriter1);
BlobAsyncClient mockBlobAsyncClient1 = mock(BlobAsyncClient.class);
doReturn(mockBlobAsyncClient1).when(mockContainerClient).getBlobAsyncClient(Matchers.matches(blobNameRegex));
BlockBlobAsyncClient mockBlockBlobAsyncClient1 = mock(BlockBlobAsyncClient.class);
doReturn(mockBlockBlobAsyncClient1).when(mockBlobAsyncClient1).getBlockBlobAsyncClient();
AzureBlobOutputStream mockAzureBlobOutputStream1 = mock(AzureBlobOutputStream.class);
PowerMockito.whenNew(AzureBlobOutputStream.class).withArguments(mockBlockBlobAsyncClient1, threadPool, mockMetrics, blobMetadataGeneratorFactory, blobMetadataGeneratorConfig, STREAM_NAME, (long) 60000, THRESHOLD, mockCompression).thenReturn(mockAzureBlobOutputStream1);
when(mockAzureBlobOutputStream1.getSize()).thenReturn((long) 1);
for (int i = 0; i < maxRecordsPerBlob; i++) {
azureBlobAvroWriter.write(ome);
}
OutgoingMessageEnvelope ome2 = createOME("Topic2");
DataFileWriter mockDataFileWriter2 = mock(DataFileWriter.class);
PowerMockito.whenNew(DataFileWriter.class).withAnyArguments().thenReturn(mockDataFileWriter2);
BlobAsyncClient mockBlobAsyncClient2 = mock(BlobAsyncClient.class);
doReturn(mockBlobAsyncClient2).when(mockContainerClient).getBlobAsyncClient(Matchers.matches(blobNameRegex));
BlockBlobAsyncClient mockBlockBlobAsyncClient2 = mock(BlockBlobAsyncClient.class);
doReturn(mockBlockBlobAsyncClient2).when(mockBlobAsyncClient2).getBlockBlobAsyncClient();
AzureBlobOutputStream mockAzureBlobOutputStream2 = mock(AzureBlobOutputStream.class);
PowerMockito.whenNew(AzureBlobOutputStream.class).withArguments(mockBlockBlobAsyncClient2, threadPool, mockMetrics, blobMetadataGeneratorFactory, blobMetadataGeneratorConfig, STREAM_NAME, (long) 60000, THRESHOLD, mockCompression).thenReturn(mockAzureBlobOutputStream2);
when(mockAzureBlobOutputStream2.getSize()).thenReturn((long) 1);
azureBlobAvroWriter.write(ome2);
ArgumentCaptor<String> argument = ArgumentCaptor.forClass(String.class);
verify(mockContainerClient, times(2)).getBlobAsyncClient(argument.capture());
argument.getAllValues().forEach(blobName -> {
Assert.assertTrue(blobName.contains(blobUrlPrefix));
});
List<String> allBlobNames = argument.getAllValues();
Assert.assertNotEquals(allBlobNames.get(0), allBlobNames.get(1));
verify(mockDataFileWriter1, times((int) maxRecordsPerBlob)).appendEncoded(ByteBuffer.wrap(encodeRecord((IndexedRecord) ome.getMessage())));
verify(mockDataFileWriter2).appendEncoded(ByteBuffer.wrap(encodeRecord((IndexedRecord) ome2.getMessage())));
verify(mockDataFileWriter1).create(((IndexedRecord) ome.getMessage()).getSchema(), mockAzureBlobOutputStream1);
verify(mockDataFileWriter2).create(((IndexedRecord) ome2.getMessage()).getSchema(), mockAzureBlobOutputStream2);
}
use of com.azure.storage.blob.specialized.BlockBlobAsyncClient in project samza by apache.
the class TestAzureBlobAvroWriter method testMaxBlobSizeExceeded.
@Test
public void testMaxBlobSizeExceeded() throws Exception {
String blobUrlPrefix = "test";
String blobNameRegex = "test/[0-9]{4}/[0-9]{2}/[0-9]{2}/[0-9]{2}/[0-9]{2}-[0-9]{2}-.{8}.avro.gz";
long maxBlobSize = 1000;
AzureBlobWriterMetrics mockMetrics = mock(AzureBlobWriterMetrics.class);
BlobContainerAsyncClient mockContainerClient = PowerMockito.mock(BlobContainerAsyncClient.class);
azureBlobAvroWriter = spy(new AzureBlobAvroWriter(mockContainerClient, mockMetrics, threadPool, THRESHOLD, 60000, blobUrlPrefix, null, null, null, blobMetadataGeneratorFactory, blobMetadataGeneratorConfig, STREAM_NAME, maxBlobSize, 10, mockCompression, true));
DataFileWriter mockDataFileWriter1 = mock(DataFileWriter.class);
PowerMockito.whenNew(DataFileWriter.class).withAnyArguments().thenReturn(mockDataFileWriter1);
BlobAsyncClient mockBlobAsyncClient1 = mock(BlobAsyncClient.class);
doReturn(mockBlobAsyncClient1).when(mockContainerClient).getBlobAsyncClient(Matchers.matches(blobNameRegex));
BlockBlobAsyncClient mockBlockBlobAsyncClient1 = mock(BlockBlobAsyncClient.class);
doReturn(mockBlockBlobAsyncClient1).when(mockBlobAsyncClient1).getBlockBlobAsyncClient();
AzureBlobOutputStream mockAzureBlobOutputStream1 = mock(AzureBlobOutputStream.class);
PowerMockito.whenNew(AzureBlobOutputStream.class).withArguments(mockBlockBlobAsyncClient1, threadPool, mockMetrics, blobMetadataGeneratorFactory, blobMetadataGeneratorConfig, STREAM_NAME, (long) 60000, THRESHOLD, mockCompression).thenReturn(mockAzureBlobOutputStream1);
when(mockAzureBlobOutputStream1.getSize()).thenReturn((long) maxBlobSize - 1);
// first OME creates the first blob
azureBlobAvroWriter.write(ome);
OutgoingMessageEnvelope ome2 = createOME("Topic2");
DataFileWriter mockDataFileWriter2 = mock(DataFileWriter.class);
PowerMockito.whenNew(DataFileWriter.class).withAnyArguments().thenReturn(mockDataFileWriter2);
BlobAsyncClient mockBlobAsyncClient2 = mock(BlobAsyncClient.class);
doReturn(mockBlobAsyncClient2).when(mockContainerClient).getBlobAsyncClient(Matchers.matches(blobNameRegex));
BlockBlobAsyncClient mockBlockBlobAsyncClient2 = mock(BlockBlobAsyncClient.class);
doReturn(mockBlockBlobAsyncClient2).when(mockBlobAsyncClient2).getBlockBlobAsyncClient();
AzureBlobOutputStream mockAzureBlobOutputStream2 = mock(AzureBlobOutputStream.class);
PowerMockito.whenNew(AzureBlobOutputStream.class).withArguments(mockBlockBlobAsyncClient2, threadPool, mockMetrics, blobMetadataGeneratorFactory, blobMetadataGeneratorConfig, STREAM_NAME, (long) 60000, THRESHOLD, mockCompression).thenReturn(mockAzureBlobOutputStream2);
when(mockAzureBlobOutputStream2.getSize()).thenReturn((long) maxBlobSize - 1);
// Second OME creates the second blob because maxBlobSize is 1000 and mockAzureBlobOutputStream.getSize is 999.
azureBlobAvroWriter.write(ome2);
ArgumentCaptor<String> argument = ArgumentCaptor.forClass(String.class);
verify(mockContainerClient, times(2)).getBlobAsyncClient(argument.capture());
argument.getAllValues().forEach(blobName -> {
Assert.assertTrue(blobName.contains(blobUrlPrefix));
});
List<String> allBlobNames = argument.getAllValues();
Assert.assertNotEquals(allBlobNames.get(0), allBlobNames.get(1));
verify(mockDataFileWriter1).appendEncoded(ByteBuffer.wrap(encodeRecord((IndexedRecord) ome.getMessage())));
verify(mockDataFileWriter2).appendEncoded(ByteBuffer.wrap(encodeRecord((IndexedRecord) ome2.getMessage())));
verify(mockDataFileWriter1).create(((IndexedRecord) ome.getMessage()).getSchema(), mockAzureBlobOutputStream1);
verify(mockDataFileWriter2).create(((IndexedRecord) ome2.getMessage()).getSchema(), mockAzureBlobOutputStream2);
}
use of com.azure.storage.blob.specialized.BlockBlobAsyncClient in project ambry by linkedin.
the class StorageClient method deleteFileAsync.
/**
* Delete a file from blob storage asynchronously, if it exists.
* @param containerName name of the container containing file to delete.
* @param fileName name of the file to delete.
* @return true if the file was deleted, otherwise false.
* @throws BlobStorageException for any error on ABS side.
*/
boolean deleteFileAsync(String containerName, String fileName) throws BlobStorageException {
AtomicReference<Boolean> retValRef = new AtomicReference<>(false);
doStorageClientOperation(() -> {
BlockBlobAsyncClient blobClient = getBlockBlobAsyncClient(containerName, fileName, false);
if (blobClient.exists().toFuture().get()) {
blobClient.delete().toFuture().get();
retValRef.set(true);
}
return null;
});
return retValRef.get();
}
use of com.azure.storage.blob.specialized.BlockBlobAsyncClient in project samza by apache.
the class AzureBlobAvroWriter method startNextBlob.
private void startNextBlob(Optional<IndexedRecord> optionalIndexedRecord) throws IOException {
if (currentBlobWriterComponents != null) {
LOG.info("Starting new blob as current blob size is " + currentBlobWriterComponents.azureBlobOutputStream.getSize() + " and max blob size is " + maxBlobSize + " or number of records is " + recordsInCurrentBlob + " and max records in blob is " + maxRecordsPerBlob);
currentBlobWriterComponents.dataFileWriter.flush();
currentBlobWriterComponents.azureBlobOutputStream.releaseBuffer();
recordsInCurrentBlob = 0;
}
// optionalIndexedRecord is the first message in this case.
if (datumWriter == null) {
if (optionalIndexedRecord.isPresent()) {
IndexedRecord record = optionalIndexedRecord.get();
schema = record.getSchema();
if (record instanceof SpecificRecord) {
datumWriter = new SpecificDatumWriter<>(schema);
} else {
datumWriter = new GenericDatumWriter<>(schema);
}
} else {
throw new IllegalStateException("Writing without schema setup.");
}
}
String blobURL;
if (useRandomStringInBlobName) {
blobURL = String.format(BLOB_NAME_RANDOM_STRING_AVRO, blobURLPrefix, UTC_FORMATTER.format(System.currentTimeMillis()), UUID.randomUUID().toString().substring(0, 8), compression.getFileExtension());
} else {
blobURL = String.format(BLOB_NAME_AVRO, blobURLPrefix, UTC_FORMATTER.format(System.currentTimeMillis()), compression.getFileExtension());
}
LOG.info("Creating new blob: {}", blobURL);
BlockBlobAsyncClient blockBlobAsyncClient = containerAsyncClient.getBlobAsyncClient(blobURL).getBlockBlobAsyncClient();
DataFileWriter<IndexedRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
AzureBlobOutputStream azureBlobOutputStream;
try {
azureBlobOutputStream = new AzureBlobOutputStream(blockBlobAsyncClient, blobThreadPool, metrics, blobMetadataGeneratorFactory, blobMetadataGeneratorConfig, streamName, flushTimeoutMs, maxBlockFlushThresholdSize, compression);
} catch (Exception e) {
throw new SamzaException("Unable to create AzureBlobOutputStream", e);
}
dataFileWriter.create(schema, azureBlobOutputStream);
dataFileWriter.setFlushOnEveryBlock(false);
this.currentBlobWriterComponents = new BlobWriterComponents(dataFileWriter, azureBlobOutputStream, blockBlobAsyncClient);
allBlobWriterComponents.add(this.currentBlobWriterComponents);
LOG.info("Created new blob: {}", blobURL);
}
Aggregations