Search in sources :

Example 1 with DataSetFile

use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile in project kylo by Teradata.

the class DataSetIT method testUpload.

/**
 * Verify uploading files.
 */
// TODO create local-files data source
@Ignore
@Test
public void testUpload() {
    // Create a data set
    final DataSet request = new DataSet();
    request.setDataSource(new DataSource());
    request.getDataSource().setId("local-files");
    final DataSet dataSet = given(DataSetController.BASE).when().body(request).post().then().statusCode(200).extract().as(DataSet.class);
    final String id = dataSet.getId();
    // Test empty data set
    List<DataSetFile> files = listUploads(id);
    Assert.assertEquals(0, files.size());
    // Upload sample files
    uploadFile(id, getSampleFile("userdata1.csv"));
    uploadFile(id, getSampleFile("userdata2.csv"));
    files = listUploads(id);
    Assert.assertThat(files, CoreMatchers.hasItem(nameEquals("userdata1.csv")));
    Assert.assertThat(files, CoreMatchers.hasItem(nameEquals("userdata2.csv")));
    Assert.assertEquals(2, files.size());
    // Delete a file
    given(DataSetController.BASE).when().delete(id + "/uploads/userdata1.csv").then().statusCode(204);
    files = listUploads(id);
    Assert.assertThat(files, CoreMatchers.hasItem(nameEquals("userdata2.csv")));
    Assert.assertEquals(1, files.size());
}
Also used : DataSet(com.thinkbiganalytics.kylo.catalog.rest.model.DataSet) DataSetFile(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile) DataSource(com.thinkbiganalytics.kylo.catalog.rest.model.DataSource) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 2 with DataSetFile

use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile in project kylo by Teradata.

the class S3FileSystemProviderTest method listFilesS3n.

/**
 * Verify listing buckets using the s3n scheme.
 */
@Test
@SuppressWarnings("unchecked")
public void listFilesS3n() {
    // Mock client
    final AmazonS3 client = Mockito.mock(AmazonS3.class);
    final Bucket bucket1 = AmazonS3Util.createBucket("bucket1");
    final Bucket bucket2 = AmazonS3Util.createBucket("bucket2");
    Mockito.when(client.listBuckets()).thenReturn(Arrays.asList(bucket1, bucket2));
    // Test listing buckets
    final S3FileSystemProvider provider = new S3FileSystemProvider() {

        @Override
        protected AmazonS3 createS3Client(@Nonnull final URI uri, @Nonnull final Configuration conf) {
            return client;
        }
    };
    final List<DataSetFile> files = provider.listFiles(new Path(S3N), new Configuration(false));
    Assert.assertThat(files, CoreMatchers.hasItems(isDataSetFile(bucket1), isDataSetFile(bucket2)));
    Assert.assertEquals(2, files.size());
}
Also used : Path(org.apache.hadoop.fs.Path) AmazonS3(com.amazonaws.services.s3.AmazonS3) Configuration(org.apache.hadoop.conf.Configuration) Bucket(com.amazonaws.services.s3.model.Bucket) Nonnull(javax.annotation.Nonnull) URI(java.net.URI) DataSetFile(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile) Test(org.junit.Test)

Example 3 with DataSetFile

use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile in project kylo by Teradata.

the class CatalogFileManagerTest method createUpload.

/**
 * Verify uploading a file.
 */
@Test
public void createUpload() throws IOException {
    final DataSet dataSet = createDataSet();
    final String src = "Hello world!";
    // Test uploading a file
    final CatalogFileManager fileManager = new MockCatalogFileManager();
    final DataSetFile upload = fileManager.createUpload(dataSet, "file-upload.txt", new ByteArrayInputStream(src.getBytes(StandardCharsets.UTF_8)));
    final File file = datasetsFolder.getRoot().toPath().resolve(dataSet.getId()).resolve("file-upload.txt").toFile();
    Assert.assertFalse("Expected uploaded file to not be a directory", upload.isDirectory());
    Assert.assertEquals(src.length(), upload.getLength().longValue());
    Assert.assertEquals("file-upload.txt", upload.getName());
    Assert.assertEquals(file.toURI(), URI.create(upload.getPath()));
    Assert.assertEquals(src, Files.toString(file, StandardCharsets.UTF_8));
}
Also used : DataSet(com.thinkbiganalytics.kylo.catalog.rest.model.DataSet) ByteArrayInputStream(java.io.ByteArrayInputStream) DataSetFile(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile) File(java.io.File) DataSetFile(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile) Test(org.junit.Test)

Example 4 with DataSetFile

use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile in project kylo by Teradata.

the class DataSourceController method doListFiles.

private List<DataSetFile> doListFiles(@QueryParam("path") String path, DataSource dataSource) {
    final List<DataSetFile> files;
    try {
        log.debug("Listing files at path: {}", path);
        files = fileManager.listFiles(path, dataSource);
    } catch (final AccessDeniedException e) {
        log.debug("Access denied accessing path: {}: {}", path, e, e);
        throw new ForbiddenException(getMessage("catalog.datasource.listFiles.forbidden", path));
    } catch (final CatalogException e) {
        log.debug("Catalog exception when accessing path: {}: {}", path, e, e);
        throw new BadRequestException(getMessage(e));
    } catch (final Exception e) {
        if (exceptionTransformer.causesInChain(e)) {
            throw new ThriftConnectionException(e);
        }
        if (log.isErrorEnabled()) {
            log.error("Failed to list data source files at path " + path + ": " + e, e);
        }
        final RestResponseStatus status = new RestResponseStatus.ResponseStatusBuilder().message(getMessage("catalog.datasource.listFiles.error", path)).url(request.getRequestURI()).setDeveloperMessage(e).buildError();
        throw new InternalServerErrorException(Response.serverError().entity(status).build());
    }
    return files;
}
Also used : AccessDeniedException(java.nio.file.AccessDeniedException) ForbiddenException(javax.ws.rs.ForbiddenException) CatalogException(com.thinkbiganalytics.kylo.catalog.CatalogException) BadRequestException(javax.ws.rs.BadRequestException) ThriftConnectionException(com.thinkbiganalytics.hive.exceptions.ThriftConnectionException) InternalServerErrorException(javax.ws.rs.InternalServerErrorException) DataSetFile(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile) TTransportException(org.apache.thrift.transport.TTransportException) BadRequestException(javax.ws.rs.BadRequestException) InternalServerErrorException(javax.ws.rs.InternalServerErrorException) NotFoundException(javax.ws.rs.NotFoundException) WebApplicationException(javax.ws.rs.WebApplicationException) AccessDeniedException(java.nio.file.AccessDeniedException) SQLException(java.sql.SQLException) ThriftConnectionException(com.thinkbiganalytics.hive.exceptions.ThriftConnectionException) CatalogException(com.thinkbiganalytics.kylo.catalog.CatalogException) PotentialControllerServiceConflictException(com.thinkbiganalytics.kylo.catalog.datasource.PotentialControllerServiceConflictException) ForbiddenException(javax.ws.rs.ForbiddenException) DataSourceAlreadyExistsException(com.thinkbiganalytics.metadata.api.catalog.DataSourceAlreadyExistsException) RestResponseStatus(com.thinkbiganalytics.rest.model.RestResponseStatus)

Example 5 with DataSetFile

use of com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile in project kylo by Teradata.

the class S3FileSystemProvider method listFiles.

@Nonnull
@Override
public List<DataSetFile> listFiles(@Nonnull final Path path, @Nonnull final Configuration conf) {
    // Determine the credentials
    final AmazonS3 s3;
    final URI uri = path.toUri();
    if ("s3".equalsIgnoreCase(uri.getScheme()) || "s3bfs".equalsIgnoreCase(uri.getScheme()) || "s3n".equalsIgnoreCase(uri.getScheme())) {
        s3 = createS3Client(uri, conf);
    } else if ("s3a".equalsIgnoreCase(uri.getScheme())) {
        final Class<? extends S3ClientFactory> s3ClientFactoryClass = conf.getClass(Constants.S3_CLIENT_FACTORY_IMPL, Constants.DEFAULT_S3_CLIENT_FACTORY_IMPL, S3ClientFactory.class);
        try {
            s3 = ReflectionUtils.newInstance(s3ClientFactoryClass, conf).createS3Client(uri);
        } catch (final IOException e) {
            throw new IllegalArgumentException("Unable to create S3 client: " + e, e);
        }
    } else {
        log.debug("Scheme {} not supported for S3 path: {}", uri.getScheme(), path);
        throw new CatalogException("catalog.fs.s3.invalidScheme", uri.getScheme());
    }
    // Fetch the list of buckets
    try {
        return s3.listBuckets().stream().map(bucket -> {
            final DataSetFile file = new DataSetFile();
            file.setName(bucket.getName());
            file.setDirectory(true);
            file.setModificationTime(bucket.getCreationDate().getTime());
            file.setPath(uri.getScheme() + "://" + bucket.getName() + "/");
            return file;
        }).collect(Collectors.toList());
    } finally {
        s3.shutdown();
    }
}
Also used : InstanceProfileCredentialsProvider(com.amazonaws.auth.InstanceProfileCredentialsProvider) AmazonS3ClientBuilder(com.amazonaws.services.s3.AmazonS3ClientBuilder) AWSCredentialsProviderChain(com.amazonaws.auth.AWSCredentialsProviderChain) LoggerFactory(org.slf4j.LoggerFactory) FileSystemProvider(com.thinkbiganalytics.kylo.catalog.spi.FileSystemProvider) BasicAWSCredentialsProvider(org.apache.hadoop.fs.s3a.BasicAWSCredentialsProvider) S3AUtils(org.apache.hadoop.fs.s3a.S3AUtils) ArrayList(java.util.ArrayList) CatalogException(com.thinkbiganalytics.kylo.catalog.CatalogException) Configuration(org.apache.hadoop.conf.Configuration) AWSCredentialsProvider(com.amazonaws.auth.AWSCredentialsProvider) Path(org.apache.hadoop.fs.Path) AmazonS3(com.amazonaws.services.s3.AmazonS3) URI(java.net.URI) Nonnull(javax.annotation.Nonnull) S3ClientFactory(org.apache.hadoop.fs.s3a.S3ClientFactory) Logger(org.slf4j.Logger) DataSetFile(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Constants(org.apache.hadoop.fs.s3a.Constants) Component(org.springframework.stereotype.Component) List(java.util.List) Optional(java.util.Optional) VisibleForTesting(com.google.common.annotations.VisibleForTesting) ReflectionUtils(org.apache.hadoop.util.ReflectionUtils) AmazonS3(com.amazonaws.services.s3.AmazonS3) S3ClientFactory(org.apache.hadoop.fs.s3a.S3ClientFactory) CatalogException(com.thinkbiganalytics.kylo.catalog.CatalogException) IOException(java.io.IOException) URI(java.net.URI) DataSetFile(com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile) Nonnull(javax.annotation.Nonnull)

Aggregations

DataSetFile (com.thinkbiganalytics.kylo.catalog.rest.model.DataSetFile)15 Test (org.junit.Test)9 Path (org.apache.hadoop.fs.Path)8 Nonnull (javax.annotation.Nonnull)6 Configuration (org.apache.hadoop.conf.Configuration)6 CatalogException (com.thinkbiganalytics.kylo.catalog.CatalogException)5 DataSet (com.thinkbiganalytics.kylo.catalog.rest.model.DataSet)5 URI (java.net.URI)4 AmazonS3 (com.amazonaws.services.s3.AmazonS3)3 DataSource (com.thinkbiganalytics.kylo.catalog.rest.model.DataSource)3 BadRequestException (javax.ws.rs.BadRequestException)3 InternalServerErrorException (javax.ws.rs.InternalServerErrorException)3 NotFoundException (javax.ws.rs.NotFoundException)3 WebApplicationException (javax.ws.rs.WebApplicationException)3 Bucket (com.amazonaws.services.s3.model.Bucket)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 CloudBlobClient (com.microsoft.azure.storage.blob.CloudBlobClient)2 CloudBlobContainer (com.microsoft.azure.storage.blob.CloudBlobContainer)2 Connector (com.thinkbiganalytics.kylo.catalog.rest.model.Connector)2 DefaultDataSetTemplate (com.thinkbiganalytics.kylo.catalog.rest.model.DefaultDataSetTemplate)2