Search in sources :

Example 6 with OwnerAndPermission

use of org.apache.gobblin.data.management.copy.OwnerAndPermission in project incubator-gobblin by apache.

the class FileAwareInputStreamDataWriterTest method testWriteWithEncryption.

@Test
public void testWriteWithEncryption() throws Exception {
    byte[] streamString = "testEncryptedContents".getBytes("UTF-8");
    byte[] expectedContents = new byte[streamString.length];
    for (int i = 0; i < streamString.length; i++) {
        expectedContents[i] = (byte) ((streamString[i] + 1) % 256);
    }
    FileStatus status = fs.getFileStatus(testTempPath);
    OwnerAndPermission ownerAndPermission = new OwnerAndPermission(status.getOwner(), status.getGroup(), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
    CopyableFile cf = CopyableFileUtils.getTestCopyableFile(ownerAndPermission);
    CopyableDatasetMetadata metadata = new CopyableDatasetMetadata(new TestCopyableDataset(new Path("/source")));
    WorkUnitState state = TestUtils.createTestWorkUnitState();
    state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, new Path(testTempPath, "staging").toString());
    state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, new Path(testTempPath, "output").toString());
    state.setProp(ConfigurationKeys.WRITER_FILE_PATH, RandomStringUtils.randomAlphabetic(5));
    state.setProp("writer.encrypt." + EncryptionConfigParser.ENCRYPTION_ALGORITHM_KEY, "insecure_shift");
    CopySource.serializeCopyEntity(state, cf);
    CopySource.serializeCopyableDataset(state, metadata);
    FileAwareInputStreamDataWriter dataWriter = new FileAwareInputStreamDataWriter(state, 1, 0);
    FileAwareInputStream fileAwareInputStream = new FileAwareInputStream(cf, StreamUtils.convertStream(new ByteArrayInputStream(streamString)));
    dataWriter.write(fileAwareInputStream);
    dataWriter.commit();
    Path writtenFilePath = new Path(new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR), cf.getDatasetAndPartition(metadata).identifier()), cf.getDestination());
    Assert.assertTrue(writtenFilePath.getName().endsWith("insecure_shift"), "Expected encryption name to be appended to destination");
    Assert.assertEquals(IOUtils.toByteArray(new FileInputStream(writtenFilePath.toString())), expectedContents);
}
Also used : TestCopyableDataset(org.apache.gobblin.data.management.copy.TestCopyableDataset) Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) CopyableDatasetMetadata(org.apache.gobblin.data.management.copy.CopyableDatasetMetadata) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) FileInputStream(java.io.FileInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) OwnerAndPermission(org.apache.gobblin.data.management.copy.OwnerAndPermission) FileAwareInputStream(org.apache.gobblin.data.management.copy.FileAwareInputStream) CopyableFile(org.apache.gobblin.data.management.copy.CopyableFile) FsPermission(org.apache.hadoop.fs.permission.FsPermission) Test(org.testng.annotations.Test)

Example 7 with OwnerAndPermission

use of org.apache.gobblin.data.management.copy.OwnerAndPermission in project incubator-gobblin by apache.

the class HiveCopyEntityHelper method getCopyableFilesFromPaths.

/**
 * Get builders for a {@link CopyableFile} for each file referred to by a {@link org.apache.hadoop.hive.metastore.api.StorageDescriptor}.
 */
List<CopyableFile.Builder> getCopyableFilesFromPaths(Collection<FileStatus> paths, CopyConfiguration configuration, Optional<Partition> partition) throws IOException {
    List<CopyableFile.Builder> builders = Lists.newArrayList();
    List<SourceAndDestination> dataFiles = Lists.newArrayList();
    Configuration hadoopConfiguration = new Configuration();
    FileSystem actualSourceFs = null;
    String referenceScheme = null;
    String referenceAuthority = null;
    for (FileStatus status : paths) {
        dataFiles.add(new SourceAndDestination(status, getTargetPathHelper().getTargetPath(status.getPath(), this.targetFs, partition, true)));
    }
    for (SourceAndDestination sourceAndDestination : dataFiles) {
        URI uri = sourceAndDestination.getSource().getPath().toUri();
        if (actualSourceFs == null || !StringUtils.equals(referenceScheme, uri.getScheme()) || !StringUtils.equals(referenceAuthority, uri.getAuthority())) {
            actualSourceFs = sourceAndDestination.getSource().getPath().getFileSystem(hadoopConfiguration);
            referenceScheme = uri.getScheme();
            referenceAuthority = uri.getAuthority();
        }
        if (!this.dataset.getTableRootPath().isPresent()) {
            // on the Hive side, and we try to copy a table with a glob location, this logic will have to change.
            throw new IOException(String.format("Table %s does not have a concrete table root path.", this.dataset.getTable().getCompleteName()));
        }
        List<OwnerAndPermission> ancestorOwnerAndPermission = CopyableFile.resolveReplicatedOwnerAndPermissionsRecursively(actualSourceFs, sourceAndDestination.getSource().getPath().getParent(), this.dataset.getTableRootPath().get().getParent(), configuration);
        builders.add(CopyableFile.fromOriginAndDestination(actualSourceFs, sourceAndDestination.getSource(), sourceAndDestination.getDestination(), configuration).ancestorsOwnerAndPermission(ancestorOwnerAndPermission));
    }
    return builders;
}
Also used : FileStatus(org.apache.hadoop.fs.FileStatus) Configuration(org.apache.hadoop.conf.Configuration) CopyConfiguration(org.apache.gobblin.data.management.copy.CopyConfiguration) Builder(lombok.Builder) FileSystem(org.apache.hadoop.fs.FileSystem) OwnerAndPermission(org.apache.gobblin.data.management.copy.OwnerAndPermission) ToString(lombok.ToString) IOException(java.io.IOException) URI(java.net.URI)

Aggregations

OwnerAndPermission (org.apache.gobblin.data.management.copy.OwnerAndPermission)7 CopyableFile (org.apache.gobblin.data.management.copy.CopyableFile)5 FileStatus (org.apache.hadoop.fs.FileStatus)5 Path (org.apache.hadoop.fs.Path)5 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)4 FsPermission (org.apache.hadoop.fs.permission.FsPermission)4 CopyableDatasetMetadata (org.apache.gobblin.data.management.copy.CopyableDatasetMetadata)3 FileAwareInputStream (org.apache.gobblin.data.management.copy.FileAwareInputStream)3 TestCopyableDataset (org.apache.gobblin.data.management.copy.TestCopyableDataset)3 Configuration (org.apache.hadoop.conf.Configuration)3 Test (org.testng.annotations.Test)3 FileInputStream (java.io.FileInputStream)2 IOException (java.io.IOException)2 CopyConfiguration (org.apache.gobblin.data.management.copy.CopyConfiguration)2 FileSystem (org.apache.hadoop.fs.FileSystem)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 File (java.io.File)1 URI (java.net.URI)1 Properties (java.util.Properties)1 Builder (lombok.Builder)1