Search in sources :

Example 11 with DataSetFile

use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile in project kylo by Teradata.

the class DataSetController method getUploads.

@GET
@Path("{id}/uploads")
@Produces(MediaType.APPLICATION_JSON)
@ApiOperation("Lists uploaded files for a data set.")
@ApiResponses({ @ApiResponse(code = 200, message = "List of uploaded files", response = DataSetFile.class, responseContainer = "List"), @ApiResponse(code = 404, message = "Data set does not exist", response = RestResponseStatus.class), @ApiResponse(code = 500, message = "Failed to list uploaded files", response = RestResponseStatus.class) })
public Response getUploads(@PathParam("id") @UUID final String dataSetId) {
    log.entry(dataSetId);
    final DataSet dataSet = findDataSet(dataSetId, true);
    final List<DataSetFile> files;
    try {
        log.debug("Listing uploaded files for dataset {}", dataSetId);
        files = fileManager.listUploads(dataSet);
    } catch (final Exception e) {
        log.error("Unable to retrieve dataset uploads: {}", e, e);
        throw new InternalServerErrorException(getMessage("catalog.dataset.getUploads.error"));
    }
    return log.exit(Response.ok(files).build());
}
Also used : DataSet(com.thinkbiganalytics.kylo.catalog.rest.model.DataSet) InternalServerErrorException(javax.ws.rs.InternalServerErrorException) DataSetFile(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile) CatalogException(com.thinkbiganalytics.kylo.catalog.CatalogException) BadRequestException(javax.ws.rs.BadRequestException) FileAlreadyExistsException(org.apache.hadoop.fs.FileAlreadyExistsException) InternalServerErrorException(javax.ws.rs.InternalServerErrorException) NotFoundException(javax.ws.rs.NotFoundException) WebApplicationException(javax.ws.rs.WebApplicationException) Path(javax.ws.rs.Path) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET) ApiOperation(io.swagger.annotations.ApiOperation) ApiResponses(io.swagger.annotations.ApiResponses)

Example 12 with DataSetFile

use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile in project kylo by Teradata.

the class DefaultCatalogFileManager method createUpload.

@Nonnull
@Override
public DataSetFile createUpload(@Nonnull final DataSet dataSet, @Nonnull final String fileName, @Nonnull final InputStream in) throws IOException {
    final Path path = getUploadPath(dataSet, fileName);
    final List<DataSetFile> files = isolatedFunction(dataSet, path, fs -> {
        log.debug("Creating file [{}] for dataset {}", fileName, dataSet.getId());
        try (final FSDataOutputStream out = fs.create(path, false)) {
            IOUtils.copyLarge(in, out);
        }
        if (username != null || groupname != null) {
            log.debug("Changing owner of [{}] to {}:{}", path, username, groupname);
            fs.setOwner(path, username, groupname);
        }
        if (permission != null) {
            log.debug("Setting permissions of [{}] to {}", path, permission);
            fs.setPermission(path, permission);
        }
        return listFiles(fs, path);
    });
    if (files.size() == 1) {
        return files.get(0);
    } else {
        log.error("Failed to upload file for dataset {} at path: {}. Expected 1 file but found {} files.", dataSet.getId(), path, files.size());
        throw new IOException("Uploaded file not found");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) IOException(java.io.IOException) DataSetFile(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile) Nonnull(javax.annotation.Nonnull)

Example 13 with DataSetFile

use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile in project kylo by Teradata.

the class CatalogFileManagerTest method listUploads.

/**
 * Verify listing uploaded files.
 */
@Test
public void listUploads() throws IOException {
    // Create data set including files
    final DataSet dataSet = createDataSet();
    final File dataSetFolder = datasetsFolder.newFolder(dataSet.getId());
    Files.write("data1", new File(dataSetFolder, "file1.txt"), StandardCharsets.UTF_8);
    Files.write("data2", new File(dataSetFolder, "file2.txt"), StandardCharsets.UTF_8);
    Files.write("data3", new File(dataSetFolder, "file3.txt"), StandardCharsets.UTF_8);
    // Test listing files
    final CatalogFileManager fileManager = new MockCatalogFileManager();
    final List<DataSetFile> files = fileManager.listUploads(dataSet);
    Assert.assertThat(files, CoreMatchers.hasItem(equalTo("file1.txt", new Path(dataSetFolder.toPath().resolve("file1.txt").toUri()).toString(), false, 5, "data1")));
    Assert.assertThat(files, CoreMatchers.hasItem(equalTo("file2.txt", new Path(dataSetFolder.toPath().resolve("file2.txt").toUri()).toString(), false, 5, "data2")));
    Assert.assertThat(files, CoreMatchers.hasItem(equalTo("file3.txt", new Path(dataSetFolder.toPath().resolve("file3.txt").toUri()).toString(), false, 5, "data3")));
    Assert.assertEquals(3, files.size());
}
Also used : Path(org.apache.hadoop.fs.Path) DataSet(com.thinkbiganalytics.kylo.catalog.rest.model.DataSet) DataSetFile(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile) File(java.io.File) DataSetFile(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile) Test(org.junit.Test)

Example 14 with DataSetFile

use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile in project kylo by Teradata.

the class DataSourceIT method testListFilesS3.

/**
 * Verify listing files from the Amazon S3 connector.
 */
@Test
public void testListFilesS3() {
    Assume.assumeNotNull(awsAccessKeyId, awsSecretAccessKey);
    // Create an S3 data source
    final Connector connector = new Connector();
    connector.setId("amazon-s3");
    final DefaultDataSetTemplate template = new DefaultDataSetTemplate();
    template.setOptions(new HashMap<>());
    template.getOptions().put("spark.hadoop.fs.s3a.access.key", awsAccessKeyId);
    template.getOptions().put("spark.hadoop.fs.s3a.secret.key", awsSecretAccessKey);
    final DataSource request = new DataSource();
    request.setConnector(connector);
    request.setTemplate(template);
    request.setTitle("test list files s3");
    final DataSource dataSource = given(DataSourceController.BASE).when().body(request).post().then().statusCode(200).extract().as(DataSource.class);
    // Test listing buckets
    final List<DataSetFile> buckets = given(DataSourceController.BASE).when().pathParam("id", dataSource.getId()).queryParam("path", "s3a:/").get("{id}/files").then().statusCode(200).extract().as(DataSetFileList.class);
    Assert.assertThat(buckets, CoreMatchers.hasItem(new CustomMatcher<DataSetFile>("DataSetFile name=thinkbig.greg directory=true") {

        @Override
        public boolean matches(final Object item) {
            return (item instanceof DataSetFile) && Objects.equals("thinkbig.greg", ((DataSetFile) item).getName()) && Objects.equals("s3a://thinkbig.greg/", ((DataSetFile) item).getPath()) && ((DataSetFile) item).isDirectory();
        }
    }));
    // Test listing files
    final List<DataSetFile> files = given(DataSourceController.BASE).when().pathParam("id", dataSource.getId()).queryParam("path", "s3a://thinkbig.greg/").get("{id}/files").then().statusCode(200).extract().as(DataSetFileList.class);
    Assert.assertThat(files, CoreMatchers.hasItem(new CustomMatcher<DataSetFile>("DataSetFile name=userdata1.csv directory=false") {

        @Override
        public boolean matches(Object item) {
            return (item instanceof DataSetFile) && Objects.equals("userdata1.csv", ((DataSetFile) item).getName()) && Objects.equals("s3a://thinkbig.greg/userdata1.csv", ((DataSetFile) item).getPath()) && !((DataSetFile) item).isDirectory();
        }
    }));
}
Also used : Connector(com.thinkbiganalytics.kylo.catalog.rest.model.Connector) CustomMatcher(org.hamcrest.CustomMatcher) DataSetFile(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile) DefaultDataSetTemplate(com.thinkbiganalytics.kylo.catalog.rest.model.DefaultDataSetTemplate) DataSource(com.thinkbiganalytics.kylo.catalog.rest.model.DataSource) Test(org.junit.Test)

Example 15 with DataSetFile

use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile in project kylo by Teradata.

the class DataSourceIT method testListFilesAzureNative.

/**
 * Verify listing files from the Azure Storage connector.
 */
@Test
public void testListFilesAzureNative() {
    Assume.assumeNotNull(azureAccountKey);
    // Create an Azure data source
    final Connector connector = new Connector();
    connector.setId("azure-storage");
    final DefaultDataSetTemplate template = new DefaultDataSetTemplate();
    template.setOptions(Collections.singletonMap("spark.hadoop.fs.azure.account.key.kylogreg1.blob.core.windows.net", azureAccountKey));
    final DataSource request = new DataSource();
    request.setConnector(connector);
    request.setTemplate(template);
    request.setTitle("test list files wasb");
    final DataSource dataSource = given(DataSourceController.BASE).when().body(request).post().then().statusCode(200).extract().as(DataSource.class);
    // Test listing containers
    final List<DataSetFile> containers = given(DataSourceController.BASE).when().pathParam("id", dataSource.getId()).queryParam("path", "wasb://kylogreg1.blob.core.windows.net/").get("{id}/files").then().statusCode(200).extract().as(DataSetFileList.class);
    Assert.assertThat(containers, CoreMatchers.hasItem(new CustomMatcher<DataSetFile>("DataSetFile name=blob123 directory=true") {

        @Override
        public boolean matches(final Object item) {
            return (item instanceof DataSetFile) && Objects.equals("blob123", ((DataSetFile) item).getName()) && Objects.equals("wasb://blob123@kylogreg1.blob.core.windows.net/", ((DataSetFile) item).getPath()) && ((DataSetFile) item).isDirectory();
        }
    }));
    // Test listing files
    final List<DataSetFile> files = given(DataSourceController.BASE).when().pathParam("id", dataSource.getId()).queryParam("path", "wasb://blob123@kylogreg1.blob.core.windows.net/").get("{id}/files").then().statusCode(200).extract().as(DataSetFileList.class);
    Assert.assertThat(files, CoreMatchers.hasItem(new CustomMatcher<DataSetFile>("DataSetFile name=books1.json directory=true") {

        @Override
        public boolean matches(final Object item) {
            return (item instanceof DataSetFile) && Objects.equals("books1.json", ((DataSetFile) item).getName()) && Objects.equals("wasb://blob123@kylogreg1.blob.core.windows.net/books1.json", ((DataSetFile) item).getPath()) && !((DataSetFile) item).isDirectory();
        }
    }));
}
Also used : Connector(com.thinkbiganalytics.kylo.catalog.rest.model.Connector) CustomMatcher(org.hamcrest.CustomMatcher) DataSetFile(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile) DefaultDataSetTemplate(com.thinkbiganalytics.kylo.catalog.rest.model.DefaultDataSetTemplate) DataSource(com.thinkbiganalytics.kylo.catalog.rest.model.DataSource) Test(org.junit.Test)

Aggregations

DataSetFile (com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile)15 Test (org.junit.Test)9 Path (org.apache.hadoop.fs.Path)8 Nonnull (javax.annotation.Nonnull)6 Configuration (org.apache.hadoop.conf.Configuration)6 CatalogException (com.thinkbiganalytics.kylo.catalog.CatalogException)5 DataSet (com.thinkbiganalytics.kylo.catalog.rest.model.DataSet)5 URI (java.net.URI)4 AmazonS3 (com.amazonaws.services.s3.AmazonS3)3 DataSource (com.thinkbiganalytics.kylo.catalog.rest.model.DataSource)3 BadRequestException (javax.ws.rs.BadRequestException)3 InternalServerErrorException (javax.ws.rs.InternalServerErrorException)3 NotFoundException (javax.ws.rs.NotFoundException)3 WebApplicationException (javax.ws.rs.WebApplicationException)3 Bucket (com.amazonaws.services.s3.model.Bucket)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 CloudBlobClient (com.microsoft.azure.storage.blob.CloudBlobClient)2 CloudBlobContainer (com.microsoft.azure.storage.blob.CloudBlobContainer)2 Connector (com.thinkbiganalytics.kylo.catalog.rest.model.Connector)2 DefaultDataSetTemplate (com.thinkbiganalytics.kylo.catalog.rest.model.DefaultDataSetTemplate)2