Search in sources :

Example 1 with BlockBlobAsyncClient

use of com.azure.storage.blob.specialized.BlockBlobAsyncClient in project samza by apache.

the class TestAzureBlobAvroWriter method testRecordLimitExceeded.

@Test
public void testRecordLimitExceeded() throws Exception {
    String blobUrlPrefix = "test";
    String blobNameRegex = "test/[0-9]{4}/[0-9]{2}/[0-9]{2}/[0-9]{2}/[0-9]{2}-[0-9]{2}-.{8}.avro.gz";
    AzureBlobWriterMetrics mockMetrics = mock(AzureBlobWriterMetrics.class);
    long maxBlobSize = AzureBlobAvroWriter.DATAFILEWRITER_OVERHEAD + 1000;
    long maxRecordsPerBlob = 10;
    BlobContainerAsyncClient mockContainerClient = PowerMockito.mock(BlobContainerAsyncClient.class);
    azureBlobAvroWriter = spy(new AzureBlobAvroWriter(mockContainerClient, mockMetrics, threadPool, THRESHOLD, 60000, blobUrlPrefix, null, null, null, blobMetadataGeneratorFactory, blobMetadataGeneratorConfig, STREAM_NAME, maxBlobSize, maxRecordsPerBlob, mockCompression, true));
    DataFileWriter mockDataFileWriter1 = mock(DataFileWriter.class);
    PowerMockito.whenNew(DataFileWriter.class).withAnyArguments().thenReturn(mockDataFileWriter1);
    BlobAsyncClient mockBlobAsyncClient1 = mock(BlobAsyncClient.class);
    doReturn(mockBlobAsyncClient1).when(mockContainerClient).getBlobAsyncClient(Matchers.matches(blobNameRegex));
    BlockBlobAsyncClient mockBlockBlobAsyncClient1 = mock(BlockBlobAsyncClient.class);
    doReturn(mockBlockBlobAsyncClient1).when(mockBlobAsyncClient1).getBlockBlobAsyncClient();
    AzureBlobOutputStream mockAzureBlobOutputStream1 = mock(AzureBlobOutputStream.class);
    PowerMockito.whenNew(AzureBlobOutputStream.class).withArguments(mockBlockBlobAsyncClient1, threadPool, mockMetrics, blobMetadataGeneratorFactory, blobMetadataGeneratorConfig, STREAM_NAME, (long) 60000, THRESHOLD, mockCompression).thenReturn(mockAzureBlobOutputStream1);
    when(mockAzureBlobOutputStream1.getSize()).thenReturn((long) 1);
    for (int i = 0; i < maxRecordsPerBlob; i++) {
        azureBlobAvroWriter.write(ome);
    }
    OutgoingMessageEnvelope ome2 = createOME("Topic2");
    DataFileWriter mockDataFileWriter2 = mock(DataFileWriter.class);
    PowerMockito.whenNew(DataFileWriter.class).withAnyArguments().thenReturn(mockDataFileWriter2);
    BlobAsyncClient mockBlobAsyncClient2 = mock(BlobAsyncClient.class);
    doReturn(mockBlobAsyncClient2).when(mockContainerClient).getBlobAsyncClient(Matchers.matches(blobNameRegex));
    BlockBlobAsyncClient mockBlockBlobAsyncClient2 = mock(BlockBlobAsyncClient.class);
    doReturn(mockBlockBlobAsyncClient2).when(mockBlobAsyncClient2).getBlockBlobAsyncClient();
    AzureBlobOutputStream mockAzureBlobOutputStream2 = mock(AzureBlobOutputStream.class);
    PowerMockito.whenNew(AzureBlobOutputStream.class).withArguments(mockBlockBlobAsyncClient2, threadPool, mockMetrics, blobMetadataGeneratorFactory, blobMetadataGeneratorConfig, STREAM_NAME, (long) 60000, THRESHOLD, mockCompression).thenReturn(mockAzureBlobOutputStream2);
    when(mockAzureBlobOutputStream2.getSize()).thenReturn((long) 1);
    azureBlobAvroWriter.write(ome2);
    ArgumentCaptor<String> argument = ArgumentCaptor.forClass(String.class);
    verify(mockContainerClient, times(2)).getBlobAsyncClient(argument.capture());
    argument.getAllValues().forEach(blobName -> {
        Assert.assertTrue(blobName.contains(blobUrlPrefix));
    });
    List<String> allBlobNames = argument.getAllValues();
    Assert.assertNotEquals(allBlobNames.get(0), allBlobNames.get(1));
    verify(mockDataFileWriter1, times((int) maxRecordsPerBlob)).appendEncoded(ByteBuffer.wrap(encodeRecord((IndexedRecord) ome.getMessage())));
    verify(mockDataFileWriter2).appendEncoded(ByteBuffer.wrap(encodeRecord((IndexedRecord) ome2.getMessage())));
    verify(mockDataFileWriter1).create(((IndexedRecord) ome.getMessage()).getSchema(), mockAzureBlobOutputStream1);
    verify(mockDataFileWriter2).create(((IndexedRecord) ome2.getMessage()).getSchema(), mockAzureBlobOutputStream2);
}
Also used : BlobContainerAsyncClient(com.azure.storage.blob.BlobContainerAsyncClient) DataFileWriter(org.apache.avro.file.DataFileWriter) Mockito.anyString(org.mockito.Mockito.anyString) AzureBlobWriterMetrics(org.apache.samza.system.azureblob.producer.AzureBlobWriterMetrics) BlobAsyncClient(com.azure.storage.blob.BlobAsyncClient) BlockBlobAsyncClient(com.azure.storage.blob.specialized.BlockBlobAsyncClient) BlockBlobAsyncClient(com.azure.storage.blob.specialized.BlockBlobAsyncClient) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 2 with BlockBlobAsyncClient

use of com.azure.storage.blob.specialized.BlockBlobAsyncClient in project samza by apache.

the class TestAzureBlobAvroWriter method testMaxBlobSizeExceeded.

@Test
public void testMaxBlobSizeExceeded() throws Exception {
    String blobUrlPrefix = "test";
    String blobNameRegex = "test/[0-9]{4}/[0-9]{2}/[0-9]{2}/[0-9]{2}/[0-9]{2}-[0-9]{2}-.{8}.avro.gz";
    long maxBlobSize = 1000;
    AzureBlobWriterMetrics mockMetrics = mock(AzureBlobWriterMetrics.class);
    BlobContainerAsyncClient mockContainerClient = PowerMockito.mock(BlobContainerAsyncClient.class);
    azureBlobAvroWriter = spy(new AzureBlobAvroWriter(mockContainerClient, mockMetrics, threadPool, THRESHOLD, 60000, blobUrlPrefix, null, null, null, blobMetadataGeneratorFactory, blobMetadataGeneratorConfig, STREAM_NAME, maxBlobSize, 10, mockCompression, true));
    DataFileWriter mockDataFileWriter1 = mock(DataFileWriter.class);
    PowerMockito.whenNew(DataFileWriter.class).withAnyArguments().thenReturn(mockDataFileWriter1);
    BlobAsyncClient mockBlobAsyncClient1 = mock(BlobAsyncClient.class);
    doReturn(mockBlobAsyncClient1).when(mockContainerClient).getBlobAsyncClient(Matchers.matches(blobNameRegex));
    BlockBlobAsyncClient mockBlockBlobAsyncClient1 = mock(BlockBlobAsyncClient.class);
    doReturn(mockBlockBlobAsyncClient1).when(mockBlobAsyncClient1).getBlockBlobAsyncClient();
    AzureBlobOutputStream mockAzureBlobOutputStream1 = mock(AzureBlobOutputStream.class);
    PowerMockito.whenNew(AzureBlobOutputStream.class).withArguments(mockBlockBlobAsyncClient1, threadPool, mockMetrics, blobMetadataGeneratorFactory, blobMetadataGeneratorConfig, STREAM_NAME, (long) 60000, THRESHOLD, mockCompression).thenReturn(mockAzureBlobOutputStream1);
    when(mockAzureBlobOutputStream1.getSize()).thenReturn((long) maxBlobSize - 1);
    // first OME creates the first blob
    azureBlobAvroWriter.write(ome);
    OutgoingMessageEnvelope ome2 = createOME("Topic2");
    DataFileWriter mockDataFileWriter2 = mock(DataFileWriter.class);
    PowerMockito.whenNew(DataFileWriter.class).withAnyArguments().thenReturn(mockDataFileWriter2);
    BlobAsyncClient mockBlobAsyncClient2 = mock(BlobAsyncClient.class);
    doReturn(mockBlobAsyncClient2).when(mockContainerClient).getBlobAsyncClient(Matchers.matches(blobNameRegex));
    BlockBlobAsyncClient mockBlockBlobAsyncClient2 = mock(BlockBlobAsyncClient.class);
    doReturn(mockBlockBlobAsyncClient2).when(mockBlobAsyncClient2).getBlockBlobAsyncClient();
    AzureBlobOutputStream mockAzureBlobOutputStream2 = mock(AzureBlobOutputStream.class);
    PowerMockito.whenNew(AzureBlobOutputStream.class).withArguments(mockBlockBlobAsyncClient2, threadPool, mockMetrics, blobMetadataGeneratorFactory, blobMetadataGeneratorConfig, STREAM_NAME, (long) 60000, THRESHOLD, mockCompression).thenReturn(mockAzureBlobOutputStream2);
    when(mockAzureBlobOutputStream2.getSize()).thenReturn((long) maxBlobSize - 1);
    // Second OME creates the second blob because maxBlobSize is 1000 and mockAzureBlobOutputStream.getSize is 999.
    azureBlobAvroWriter.write(ome2);
    ArgumentCaptor<String> argument = ArgumentCaptor.forClass(String.class);
    verify(mockContainerClient, times(2)).getBlobAsyncClient(argument.capture());
    argument.getAllValues().forEach(blobName -> {
        Assert.assertTrue(blobName.contains(blobUrlPrefix));
    });
    List<String> allBlobNames = argument.getAllValues();
    Assert.assertNotEquals(allBlobNames.get(0), allBlobNames.get(1));
    verify(mockDataFileWriter1).appendEncoded(ByteBuffer.wrap(encodeRecord((IndexedRecord) ome.getMessage())));
    verify(mockDataFileWriter2).appendEncoded(ByteBuffer.wrap(encodeRecord((IndexedRecord) ome2.getMessage())));
    verify(mockDataFileWriter1).create(((IndexedRecord) ome.getMessage()).getSchema(), mockAzureBlobOutputStream1);
    verify(mockDataFileWriter2).create(((IndexedRecord) ome2.getMessage()).getSchema(), mockAzureBlobOutputStream2);
}
Also used : BlobContainerAsyncClient(com.azure.storage.blob.BlobContainerAsyncClient) DataFileWriter(org.apache.avro.file.DataFileWriter) BlobAsyncClient(com.azure.storage.blob.BlobAsyncClient) BlockBlobAsyncClient(com.azure.storage.blob.specialized.BlockBlobAsyncClient) Mockito.anyString(org.mockito.Mockito.anyString) BlockBlobAsyncClient(com.azure.storage.blob.specialized.BlockBlobAsyncClient) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) AzureBlobWriterMetrics(org.apache.samza.system.azureblob.producer.AzureBlobWriterMetrics) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 3 with BlockBlobAsyncClient

use of com.azure.storage.blob.specialized.BlockBlobAsyncClient in project ambry by linkedin.

the class StorageClient method deleteFileAsync.

/**
 * Delete a file from blob storage asynchronously, if it exists.
 * @param containerName name of the container containing file to delete.
 * @param fileName name of the file to delete.
 * @return true if the file was deleted, otherwise false.
 * @throws BlobStorageException for any error on ABS side.
 */
boolean deleteFileAsync(String containerName, String fileName) throws BlobStorageException {
    AtomicReference<Boolean> retValRef = new AtomicReference<>(false);
    doStorageClientOperation(() -> {
        BlockBlobAsyncClient blobClient = getBlockBlobAsyncClient(containerName, fileName, false);
        if (blobClient.exists().toFuture().get()) {
            blobClient.delete().toFuture().get();
            retValRef.set(true);
        }
        return null;
    });
    return retValRef.get();
}
Also used : AtomicReference(java.util.concurrent.atomic.AtomicReference) BlockBlobAsyncClient(com.azure.storage.blob.specialized.BlockBlobAsyncClient)

Example 4 with BlockBlobAsyncClient

use of com.azure.storage.blob.specialized.BlockBlobAsyncClient in project samza by apache.

the class AzureBlobAvroWriter method startNextBlob.

private void startNextBlob(Optional<IndexedRecord> optionalIndexedRecord) throws IOException {
    if (currentBlobWriterComponents != null) {
        LOG.info("Starting new blob as current blob size is " + currentBlobWriterComponents.azureBlobOutputStream.getSize() + " and max blob size is " + maxBlobSize + " or number of records is " + recordsInCurrentBlob + " and max records in blob is " + maxRecordsPerBlob);
        currentBlobWriterComponents.dataFileWriter.flush();
        currentBlobWriterComponents.azureBlobOutputStream.releaseBuffer();
        recordsInCurrentBlob = 0;
    }
    // optionalIndexedRecord is the first message in this case.
    if (datumWriter == null) {
        if (optionalIndexedRecord.isPresent()) {
            IndexedRecord record = optionalIndexedRecord.get();
            schema = record.getSchema();
            if (record instanceof SpecificRecord) {
                datumWriter = new SpecificDatumWriter<>(schema);
            } else {
                datumWriter = new GenericDatumWriter<>(schema);
            }
        } else {
            throw new IllegalStateException("Writing without schema setup.");
        }
    }
    String blobURL;
    if (useRandomStringInBlobName) {
        blobURL = String.format(BLOB_NAME_RANDOM_STRING_AVRO, blobURLPrefix, UTC_FORMATTER.format(System.currentTimeMillis()), UUID.randomUUID().toString().substring(0, 8), compression.getFileExtension());
    } else {
        blobURL = String.format(BLOB_NAME_AVRO, blobURLPrefix, UTC_FORMATTER.format(System.currentTimeMillis()), compression.getFileExtension());
    }
    LOG.info("Creating new blob: {}", blobURL);
    BlockBlobAsyncClient blockBlobAsyncClient = containerAsyncClient.getBlobAsyncClient(blobURL).getBlockBlobAsyncClient();
    DataFileWriter<IndexedRecord> dataFileWriter = new DataFileWriter<>(datumWriter);
    AzureBlobOutputStream azureBlobOutputStream;
    try {
        azureBlobOutputStream = new AzureBlobOutputStream(blockBlobAsyncClient, blobThreadPool, metrics, blobMetadataGeneratorFactory, blobMetadataGeneratorConfig, streamName, flushTimeoutMs, maxBlockFlushThresholdSize, compression);
    } catch (Exception e) {
        throw new SamzaException("Unable to create AzureBlobOutputStream", e);
    }
    dataFileWriter.create(schema, azureBlobOutputStream);
    dataFileWriter.setFlushOnEveryBlock(false);
    this.currentBlobWriterComponents = new BlobWriterComponents(dataFileWriter, azureBlobOutputStream, blockBlobAsyncClient);
    allBlobWriterComponents.add(this.currentBlobWriterComponents);
    LOG.info("Created new blob: {}", blobURL);
}
Also used : IndexedRecord(org.apache.avro.generic.IndexedRecord) DataFileWriter(org.apache.avro.file.DataFileWriter) SamzaException(org.apache.samza.SamzaException) IOException(java.io.IOException) SamzaException(org.apache.samza.SamzaException) SpecificRecord(org.apache.avro.specific.SpecificRecord) BlockBlobAsyncClient(com.azure.storage.blob.specialized.BlockBlobAsyncClient)

Aggregations

BlockBlobAsyncClient (com.azure.storage.blob.specialized.BlockBlobAsyncClient)4 DataFileWriter (org.apache.avro.file.DataFileWriter)3 BlobAsyncClient (com.azure.storage.blob.BlobAsyncClient)2 BlobContainerAsyncClient (com.azure.storage.blob.BlobContainerAsyncClient)2 OutgoingMessageEnvelope (org.apache.samza.system.OutgoingMessageEnvelope)2 AzureBlobWriterMetrics (org.apache.samza.system.azureblob.producer.AzureBlobWriterMetrics)2 Test (org.junit.Test)2 Mockito.anyString (org.mockito.Mockito.anyString)2 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)2 IOException (java.io.IOException)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 IndexedRecord (org.apache.avro.generic.IndexedRecord)1 SpecificRecord (org.apache.avro.specific.SpecificRecord)1 SamzaException (org.apache.samza.SamzaException)1