Search in sources :

Example 1 with AzureBlobWriterMetrics

use of org.apache.samza.system.azureblob.producer.AzureBlobWriterMetrics in project samza by apache.

the class TestAzureBlobAvroWriter method testRecordLimitExceeded.

@Test
public void testRecordLimitExceeded() throws Exception {
    String blobUrlPrefix = "test";
    String blobNameRegex = "test/[0-9]{4}/[0-9]{2}/[0-9]{2}/[0-9]{2}/[0-9]{2}-[0-9]{2}-.{8}.avro.gz";
    AzureBlobWriterMetrics mockMetrics = mock(AzureBlobWriterMetrics.class);
    long maxBlobSize = AzureBlobAvroWriter.DATAFILEWRITER_OVERHEAD + 1000;
    long maxRecordsPerBlob = 10;
    BlobContainerAsyncClient mockContainerClient = PowerMockito.mock(BlobContainerAsyncClient.class);
    azureBlobAvroWriter = spy(new AzureBlobAvroWriter(mockContainerClient, mockMetrics, threadPool, THRESHOLD, 60000, blobUrlPrefix, null, null, null, blobMetadataGeneratorFactory, blobMetadataGeneratorConfig, STREAM_NAME, maxBlobSize, maxRecordsPerBlob, mockCompression, true));
    DataFileWriter mockDataFileWriter1 = mock(DataFileWriter.class);
    PowerMockito.whenNew(DataFileWriter.class).withAnyArguments().thenReturn(mockDataFileWriter1);
    BlobAsyncClient mockBlobAsyncClient1 = mock(BlobAsyncClient.class);
    doReturn(mockBlobAsyncClient1).when(mockContainerClient).getBlobAsyncClient(Matchers.matches(blobNameRegex));
    BlockBlobAsyncClient mockBlockBlobAsyncClient1 = mock(BlockBlobAsyncClient.class);
    doReturn(mockBlockBlobAsyncClient1).when(mockBlobAsyncClient1).getBlockBlobAsyncClient();
    AzureBlobOutputStream mockAzureBlobOutputStream1 = mock(AzureBlobOutputStream.class);
    PowerMockito.whenNew(AzureBlobOutputStream.class).withArguments(mockBlockBlobAsyncClient1, threadPool, mockMetrics, blobMetadataGeneratorFactory, blobMetadataGeneratorConfig, STREAM_NAME, (long) 60000, THRESHOLD, mockCompression).thenReturn(mockAzureBlobOutputStream1);
    when(mockAzureBlobOutputStream1.getSize()).thenReturn((long) 1);
    for (int i = 0; i < maxRecordsPerBlob; i++) {
        azureBlobAvroWriter.write(ome);
    }
    OutgoingMessageEnvelope ome2 = createOME("Topic2");
    DataFileWriter mockDataFileWriter2 = mock(DataFileWriter.class);
    PowerMockito.whenNew(DataFileWriter.class).withAnyArguments().thenReturn(mockDataFileWriter2);
    BlobAsyncClient mockBlobAsyncClient2 = mock(BlobAsyncClient.class);
    doReturn(mockBlobAsyncClient2).when(mockContainerClient).getBlobAsyncClient(Matchers.matches(blobNameRegex));
    BlockBlobAsyncClient mockBlockBlobAsyncClient2 = mock(BlockBlobAsyncClient.class);
    doReturn(mockBlockBlobAsyncClient2).when(mockBlobAsyncClient2).getBlockBlobAsyncClient();
    AzureBlobOutputStream mockAzureBlobOutputStream2 = mock(AzureBlobOutputStream.class);
    PowerMockito.whenNew(AzureBlobOutputStream.class).withArguments(mockBlockBlobAsyncClient2, threadPool, mockMetrics, blobMetadataGeneratorFactory, blobMetadataGeneratorConfig, STREAM_NAME, (long) 60000, THRESHOLD, mockCompression).thenReturn(mockAzureBlobOutputStream2);
    when(mockAzureBlobOutputStream2.getSize()).thenReturn((long) 1);
    azureBlobAvroWriter.write(ome2);
    ArgumentCaptor<String> argument = ArgumentCaptor.forClass(String.class);
    verify(mockContainerClient, times(2)).getBlobAsyncClient(argument.capture());
    argument.getAllValues().forEach(blobName -> {
        Assert.assertTrue(blobName.contains(blobUrlPrefix));
    });
    List<String> allBlobNames = argument.getAllValues();
    Assert.assertNotEquals(allBlobNames.get(0), allBlobNames.get(1));
    verify(mockDataFileWriter1, times((int) maxRecordsPerBlob)).appendEncoded(ByteBuffer.wrap(encodeRecord((IndexedRecord) ome.getMessage())));
    verify(mockDataFileWriter2).appendEncoded(ByteBuffer.wrap(encodeRecord((IndexedRecord) ome2.getMessage())));
    verify(mockDataFileWriter1).create(((IndexedRecord) ome.getMessage()).getSchema(), mockAzureBlobOutputStream1);
    verify(mockDataFileWriter2).create(((IndexedRecord) ome2.getMessage()).getSchema(), mockAzureBlobOutputStream2);
}
Also used : BlobContainerAsyncClient(com.azure.storage.blob.BlobContainerAsyncClient) DataFileWriter(org.apache.avro.file.DataFileWriter) Mockito.anyString(org.mockito.Mockito.anyString) AzureBlobWriterMetrics(org.apache.samza.system.azureblob.producer.AzureBlobWriterMetrics) BlobAsyncClient(com.azure.storage.blob.BlobAsyncClient) BlockBlobAsyncClient(com.azure.storage.blob.specialized.BlockBlobAsyncClient) BlockBlobAsyncClient(com.azure.storage.blob.specialized.BlockBlobAsyncClient) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 2 with AzureBlobWriterMetrics

use of org.apache.samza.system.azureblob.producer.AzureBlobWriterMetrics in project samza by apache.

the class TestAzureBlobAvroWriter method testMaxBlobSizeExceeded.

@Test
public void testMaxBlobSizeExceeded() throws Exception {
    String blobUrlPrefix = "test";
    String blobNameRegex = "test/[0-9]{4}/[0-9]{2}/[0-9]{2}/[0-9]{2}/[0-9]{2}-[0-9]{2}-.{8}.avro.gz";
    long maxBlobSize = 1000;
    AzureBlobWriterMetrics mockMetrics = mock(AzureBlobWriterMetrics.class);
    BlobContainerAsyncClient mockContainerClient = PowerMockito.mock(BlobContainerAsyncClient.class);
    azureBlobAvroWriter = spy(new AzureBlobAvroWriter(mockContainerClient, mockMetrics, threadPool, THRESHOLD, 60000, blobUrlPrefix, null, null, null, blobMetadataGeneratorFactory, blobMetadataGeneratorConfig, STREAM_NAME, maxBlobSize, 10, mockCompression, true));
    DataFileWriter mockDataFileWriter1 = mock(DataFileWriter.class);
    PowerMockito.whenNew(DataFileWriter.class).withAnyArguments().thenReturn(mockDataFileWriter1);
    BlobAsyncClient mockBlobAsyncClient1 = mock(BlobAsyncClient.class);
    doReturn(mockBlobAsyncClient1).when(mockContainerClient).getBlobAsyncClient(Matchers.matches(blobNameRegex));
    BlockBlobAsyncClient mockBlockBlobAsyncClient1 = mock(BlockBlobAsyncClient.class);
    doReturn(mockBlockBlobAsyncClient1).when(mockBlobAsyncClient1).getBlockBlobAsyncClient();
    AzureBlobOutputStream mockAzureBlobOutputStream1 = mock(AzureBlobOutputStream.class);
    PowerMockito.whenNew(AzureBlobOutputStream.class).withArguments(mockBlockBlobAsyncClient1, threadPool, mockMetrics, blobMetadataGeneratorFactory, blobMetadataGeneratorConfig, STREAM_NAME, (long) 60000, THRESHOLD, mockCompression).thenReturn(mockAzureBlobOutputStream1);
    when(mockAzureBlobOutputStream1.getSize()).thenReturn((long) maxBlobSize - 1);
    // first OME creates the first blob
    azureBlobAvroWriter.write(ome);
    OutgoingMessageEnvelope ome2 = createOME("Topic2");
    DataFileWriter mockDataFileWriter2 = mock(DataFileWriter.class);
    PowerMockito.whenNew(DataFileWriter.class).withAnyArguments().thenReturn(mockDataFileWriter2);
    BlobAsyncClient mockBlobAsyncClient2 = mock(BlobAsyncClient.class);
    doReturn(mockBlobAsyncClient2).when(mockContainerClient).getBlobAsyncClient(Matchers.matches(blobNameRegex));
    BlockBlobAsyncClient mockBlockBlobAsyncClient2 = mock(BlockBlobAsyncClient.class);
    doReturn(mockBlockBlobAsyncClient2).when(mockBlobAsyncClient2).getBlockBlobAsyncClient();
    AzureBlobOutputStream mockAzureBlobOutputStream2 = mock(AzureBlobOutputStream.class);
    PowerMockito.whenNew(AzureBlobOutputStream.class).withArguments(mockBlockBlobAsyncClient2, threadPool, mockMetrics, blobMetadataGeneratorFactory, blobMetadataGeneratorConfig, STREAM_NAME, (long) 60000, THRESHOLD, mockCompression).thenReturn(mockAzureBlobOutputStream2);
    when(mockAzureBlobOutputStream2.getSize()).thenReturn((long) maxBlobSize - 1);
    // Second OME creates the second blob because maxBlobSize is 1000 and mockAzureBlobOutputStream.getSize is 999.
    azureBlobAvroWriter.write(ome2);
    ArgumentCaptor<String> argument = ArgumentCaptor.forClass(String.class);
    verify(mockContainerClient, times(2)).getBlobAsyncClient(argument.capture());
    argument.getAllValues().forEach(blobName -> {
        Assert.assertTrue(blobName.contains(blobUrlPrefix));
    });
    List<String> allBlobNames = argument.getAllValues();
    Assert.assertNotEquals(allBlobNames.get(0), allBlobNames.get(1));
    verify(mockDataFileWriter1).appendEncoded(ByteBuffer.wrap(encodeRecord((IndexedRecord) ome.getMessage())));
    verify(mockDataFileWriter2).appendEncoded(ByteBuffer.wrap(encodeRecord((IndexedRecord) ome2.getMessage())));
    verify(mockDataFileWriter1).create(((IndexedRecord) ome.getMessage()).getSchema(), mockAzureBlobOutputStream1);
    verify(mockDataFileWriter2).create(((IndexedRecord) ome2.getMessage()).getSchema(), mockAzureBlobOutputStream2);
}
Also used : BlobContainerAsyncClient(com.azure.storage.blob.BlobContainerAsyncClient) DataFileWriter(org.apache.avro.file.DataFileWriter) BlobAsyncClient(com.azure.storage.blob.BlobAsyncClient) BlockBlobAsyncClient(com.azure.storage.blob.specialized.BlockBlobAsyncClient) Mockito.anyString(org.mockito.Mockito.anyString) BlockBlobAsyncClient(com.azure.storage.blob.specialized.BlockBlobAsyncClient) OutgoingMessageEnvelope(org.apache.samza.system.OutgoingMessageEnvelope) AzureBlobWriterMetrics(org.apache.samza.system.azureblob.producer.AzureBlobWriterMetrics) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Aggregations

BlobAsyncClient (com.azure.storage.blob.BlobAsyncClient)2 BlobContainerAsyncClient (com.azure.storage.blob.BlobContainerAsyncClient)2 BlockBlobAsyncClient (com.azure.storage.blob.specialized.BlockBlobAsyncClient)2 DataFileWriter (org.apache.avro.file.DataFileWriter)2 OutgoingMessageEnvelope (org.apache.samza.system.OutgoingMessageEnvelope)2 AzureBlobWriterMetrics (org.apache.samza.system.azureblob.producer.AzureBlobWriterMetrics)2 Test (org.junit.Test)2 Mockito.anyString (org.mockito.Mockito.anyString)2 PrepareForTest (org.powermock.core.classloader.annotations.PrepareForTest)2