Search in sources :

Example 6 with DataSetFile

use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile in project kylo by Teradata.

the class S3FileSystemProviderTest method listFiles.

/**
 * Verify listing buckets using the s3n scheme.
 */
@Test
@SuppressWarnings("unchecked")
public void listFiles() {
    // Mock client
    final AmazonS3 client = Mockito.mock(AmazonS3.class);
    final Bucket bucket1 = createBucket("bucket1");
    final Bucket bucket2 = createBucket("bucket2");
    Mockito.when(client.listBuckets()).thenReturn(Arrays.asList(bucket1, bucket2));
    // Test listing buckets
    final S3FileSystemProvider provider = new S3FileSystemProvider() {

        @Override
        protected AmazonS3 createS3Client(@Nonnull final URI uri, @Nonnull final Configuration conf) {
            return client;
        }
    };
    final List<DataSetFile> files = provider.listFiles(new Path(S3N), new Configuration(false));
    Assert.assertThat(files, CoreMatchers.hasItems(isDataSetFile(bucket1), isDataSetFile(bucket2)));
    Assert.assertEquals(2, files.size());
}
Also used : Path(org.apache.hadoop.fs.Path) AmazonS3(com.amazonaws.services.s3.AmazonS3) Configuration(org.apache.hadoop.conf.Configuration) Bucket(com.amazonaws.services.s3.model.Bucket) Nonnull(javax.annotation.Nonnull) URI(java.net.URI) DataSetFile(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile) Test(org.junit.Test)

Example 7 with DataSetFile

use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile in project kylo by Teradata.

the class AzureNativeFileSystemProvider method listFiles.

@Nonnull
@Override
@SuppressWarnings("squid:S1075")
public List<DataSetFile> listFiles(@Nonnull final Path path, @Nonnull final Configuration conf) {
    // Create Azure Blob client
    final URI uri = path.toUri();
    final CloudBlobClient client = createBlobClient(uri, conf);
    // List containers as data set files
    final String pathPrefix = uri.getScheme() + "://";
    final String pathSuffix = "@" + uri.getHost() + (uri.getPort() > -1 ? ":" + uri.getPort() : "") + "/";
    return StreamSupport.stream(listContainers(client).spliterator(), false).map(container -> {
        final DataSetFile file = new DataSetFile();
        file.setName(container.getName());
        file.setDirectory(true);
        file.setModificationTime(container.getProperties().getLastModified().getTime());
        file.setPath(pathPrefix + container.getName() + pathSuffix);
        return file;
    }).collect(Collectors.toList());
}
Also used : StorageCredentialsAccountAndKey(com.microsoft.azure.storage.StorageCredentialsAccountAndKey) RetryExponentialRetry(com.microsoft.azure.storage.RetryExponentialRetry) DataSetFile(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile) KeyProviderException(org.apache.hadoop.fs.azure.KeyProviderException) CloudBlobClient(com.microsoft.azure.storage.blob.CloudBlobClient) AzureNativeFileSystemStore(org.apache.hadoop.fs.azure.AzureNativeFileSystemStore) FileSystemProvider(com.thinkbiganalytics.kylo.catalog.spi.FileSystemProvider) StringUtils(org.apache.commons.lang3.StringUtils) Collectors(java.util.stream.Collectors) StorageCredentials(com.microsoft.azure.storage.StorageCredentials) Component(org.springframework.stereotype.Component) List(java.util.List) RetryPolicyFactory(com.microsoft.azure.storage.RetryPolicyFactory) CloudBlobContainer(com.microsoft.azure.storage.blob.CloudBlobContainer) CatalogException(com.thinkbiganalytics.kylo.catalog.CatalogException) Configuration(org.apache.hadoop.conf.Configuration) Path(org.apache.hadoop.fs.Path) StreamSupport(java.util.stream.StreamSupport) VisibleForTesting(com.google.common.annotations.VisibleForTesting) URI(java.net.URI) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) CloudBlobClient(com.microsoft.azure.storage.blob.CloudBlobClient) URI(java.net.URI) DataSetFile(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile) Nonnull(javax.annotation.Nonnull)

Example 8 with DataSetFile

use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile in project kylo by Teradata.

the class S3FileSystemProviderTest method listFilesS3a.

/**
 * Verify listing buckets using the s3a scheme.
 */
@Test
@SuppressWarnings("unchecked")
public void listFilesS3a() {
    // Setup configuration
    final Configuration conf = new Configuration(false);
    conf.setClass(Constants.S3_CLIENT_FACTORY_IMPL, MockS3ClientFactory.class, S3ClientFactory.class);
    // Test listing buckets
    final S3FileSystemProvider provider = new S3FileSystemProvider();
    final List<DataSetFile> files = provider.listFiles(new Path(S3A), conf);
    Assert.assertThat(files, CoreMatchers.hasItems(isDataSetFile(MockS3ClientFactory.BUCKET1), isDataSetFile(MockS3ClientFactory.BUCKET2)));
    Assert.assertEquals(2, files.size());
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) DataSetFile(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile) Test(org.junit.Test)

Example 9 with DataSetFile

use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile in project kylo by Teradata.

the class AzureNativeFileSystemProviderTest method listFiles.

/**
 * Verify listing containers using the wasb scheme.
 */
@Test
@SuppressWarnings("unchecked")
public void listFiles() {
    // Test listing containers
    final AzureNativeFileSystemProvider provider = new AzureNativeFileSystemProvider() {

        @Nonnull
        @Override
        protected Iterable<CloudBlobContainer> listContainers(@Nonnull final CloudBlobClient client) {
            return Arrays.asList(createContainer("container1", client), createContainer("container2", client));
        }
    };
    final List<DataSetFile> files = provider.listFiles(new Path(WASB), new Configuration(false));
    Assert.assertThat(files, CoreMatchers.hasItems(isDataSetFile("container1"), isDataSetFile("container2")));
    Assert.assertEquals(2, files.size());
}
Also used : Path(org.apache.hadoop.fs.Path) CloudBlobClient(com.microsoft.azure.storage.blob.CloudBlobClient) Configuration(org.apache.hadoop.conf.Configuration) Nonnull(javax.annotation.Nonnull) CloudBlobContainer(com.microsoft.azure.storage.blob.CloudBlobContainer) DataSetFile(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile) Test(org.junit.Test)

Example 10 with DataSetFile

use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile in project kylo by Teradata.

the class DataSetController method postUpload.

@POST
@Path("{id}/uploads")
@Consumes(MediaType.MULTIPART_FORM_DATA)
@Produces(MediaType.APPLICATION_JSON)
@ApiOperation("Uploads a file for the data set.")
@ApiResponses({ @ApiResponse(code = 200, message = "Uploaded file info", response = DataSetFile.class), @ApiResponse(code = 400, message = "Invalid filename", response = RestResponseStatus.class), @ApiResponse(code = 404, message = "Data set does not exist", response = RestResponseStatus.class), @ApiResponse(code = 409, message = "A file already exists with the same name", response = RestResponseStatus.class), @ApiResponse(code = 500, message = "Failed to upload file", response = RestResponseStatus.class) })
public Response postUpload(@PathParam("id") @UUID final String dataSetId, @Nonnull final FormDataMultiPart form) {
    log.entry(dataSetId, form);
    final List<BodyPart> bodyParts = form.getBodyParts();
    if (bodyParts.size() != 1) {
        log.debug("Wrong number of form parts for uploading to dataset {}: {}", dataSetId, bodyParts.size());
        throw new BadRequestException(getMessage("catalog.dataset.postUpload.missingBodyPart"));
    }
    final DataSet dataSet = findDataSet(dataSetId, true);
    final DataSetFile file;
    try {
        final BodyPart part = bodyParts.get(0);
        log.debug("Upload file [{}] for dataset {}", part.getContentDisposition().getFileName(), dataSetId);
        file = fileManager.createUpload(dataSet, part.getContentDisposition().getFileName(), part.getEntityAs(BodyPartEntity.class).getInputStream());
    } catch (final FileAlreadyExistsException e) {
        log.debug("Filename conflict for uploaded file [{}] for dataset {}: {}", bodyParts.get(0).getContentDisposition().getFileName(), dataSetId, e, e);
        throw new WebApplicationException(getMessage("catalog.dataset.postUpload.fileExists"), Response.Status.CONFLICT);
    } catch (final IllegalArgumentException e) {
        log.debug("Invalid filename [{}] for uploaded file for dataset {}: {}", bodyParts.get(0).getContentDisposition().getFileName(), dataSetId, e, e);
        throw new BadRequestException(getMessage("catalog.dataset.invalidFileName"));
    } catch (final Exception e) {
        log.error("Failed to save file for dataset {}: {}", dataSetId, e, e);
        throw new InternalServerErrorException(getMessage("catalog.dataset.postUpload.error"));
    }
    return log.exit(Response.ok(file).build());
}
Also used : BodyPart(org.glassfish.jersey.media.multipart.BodyPart) FileAlreadyExistsException(org.apache.hadoop.fs.FileAlreadyExistsException) WebApplicationException(javax.ws.rs.WebApplicationException) DataSet(com.thinkbiganalytics.kylo.catalog.rest.model.DataSet) BadRequestException(javax.ws.rs.BadRequestException) InternalServerErrorException(javax.ws.rs.InternalServerErrorException) DataSetFile(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile) CatalogException(com.thinkbiganalytics.kylo.catalog.CatalogException) BadRequestException(javax.ws.rs.BadRequestException) FileAlreadyExistsException(org.apache.hadoop.fs.FileAlreadyExistsException) InternalServerErrorException(javax.ws.rs.InternalServerErrorException) NotFoundException(javax.ws.rs.NotFoundException) WebApplicationException(javax.ws.rs.WebApplicationException) BodyPartEntity(org.glassfish.jersey.media.multipart.BodyPartEntity) Path(javax.ws.rs.Path) POST(javax.ws.rs.POST) Consumes(javax.ws.rs.Consumes) Produces(javax.ws.rs.Produces) ApiOperation(io.swagger.annotations.ApiOperation) ApiResponses(io.swagger.annotations.ApiResponses)

Aggregations

DataSetFile (com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile)15 Test (org.junit.Test)9 Path (org.apache.hadoop.fs.Path)8 Nonnull (javax.annotation.Nonnull)6 Configuration (org.apache.hadoop.conf.Configuration)6 CatalogException (com.thinkbiganalytics.kylo.catalog.CatalogException)5 DataSet (com.thinkbiganalytics.kylo.catalog.rest.model.DataSet)5 URI (java.net.URI)4 AmazonS3 (com.amazonaws.services.s3.AmazonS3)3 DataSource (com.thinkbiganalytics.kylo.catalog.rest.model.DataSource)3 BadRequestException (javax.ws.rs.BadRequestException)3 InternalServerErrorException (javax.ws.rs.InternalServerErrorException)3 NotFoundException (javax.ws.rs.NotFoundException)3 WebApplicationException (javax.ws.rs.WebApplicationException)3 Bucket (com.amazonaws.services.s3.model.Bucket)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 CloudBlobClient (com.microsoft.azure.storage.blob.CloudBlobClient)2 CloudBlobContainer (com.microsoft.azure.storage.blob.CloudBlobContainer)2 Connector (com.thinkbiganalytics.kylo.catalog.rest.model.Connector)2 DefaultDataSetTemplate (com.thinkbiganalytics.kylo.catalog.rest.model.DefaultDataSetTemplate)2