Search in sources :

Example 6 with FileAwareInputStream

use of org.apache.gobblin.data.management.copy.FileAwareInputStream in project incubator-gobblin by apache.

the class InputStreamExtractorTest method testReadRecord.

@Test
public void testReadRecord() throws Exception {
    CopyableFile file = getTestCopyableFile("inputStreamExtractorTest/first.txt");
    FileAwareInputStreamExtractor extractor = new FileAwareInputStreamExtractor(FileSystem.getLocal(new Configuration()), file);
    FileAwareInputStream fileAwareInputStream = extractor.readRecord(null);
    Assert.assertEquals(fileAwareInputStream.getFile().getOrigin().getPath(), file.getOrigin().getPath());
    Assert.assertEquals(IOUtils.toString(fileAwareInputStream.getInputStream()), "first");
    Assert.assertNull(extractor.readRecord(null));
}
Also used : CopyConfiguration(org.apache.gobblin.data.management.copy.CopyConfiguration) Configuration(org.apache.hadoop.conf.Configuration) FileAwareInputStream(org.apache.gobblin.data.management.copy.FileAwareInputStream) CopyableFile(org.apache.gobblin.data.management.copy.CopyableFile) Test(org.testng.annotations.Test)

Example 7 with FileAwareInputStream

use of org.apache.gobblin.data.management.copy.FileAwareInputStream in project incubator-gobblin by apache.

the class FileAwareInputStreamDataWriterTest method testWriteWithEncryption.

@Test
public void testWriteWithEncryption() throws Exception {
    byte[] streamString = "testEncryptedContents".getBytes("UTF-8");
    byte[] expectedContents = new byte[streamString.length];
    for (int i = 0; i < streamString.length; i++) {
        expectedContents[i] = (byte) ((streamString[i] + 1) % 256);
    }
    FileStatus status = fs.getFileStatus(testTempPath);
    OwnerAndPermission ownerAndPermission = new OwnerAndPermission(status.getOwner(), status.getGroup(), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
    CopyableFile cf = CopyableFileUtils.getTestCopyableFile(ownerAndPermission);
    CopyableDatasetMetadata metadata = new CopyableDatasetMetadata(new TestCopyableDataset(new Path("/source")));
    WorkUnitState state = TestUtils.createTestWorkUnitState();
    state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, new Path(testTempPath, "staging").toString());
    state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, new Path(testTempPath, "output").toString());
    state.setProp(ConfigurationKeys.WRITER_FILE_PATH, RandomStringUtils.randomAlphabetic(5));
    state.setProp("writer.encrypt." + EncryptionConfigParser.ENCRYPTION_ALGORITHM_KEY, "insecure_shift");
    CopySource.serializeCopyEntity(state, cf);
    CopySource.serializeCopyableDataset(state, metadata);
    FileAwareInputStreamDataWriter dataWriter = new FileAwareInputStreamDataWriter(state, 1, 0);
    FileAwareInputStream fileAwareInputStream = new FileAwareInputStream(cf, StreamUtils.convertStream(new ByteArrayInputStream(streamString)));
    dataWriter.write(fileAwareInputStream);
    dataWriter.commit();
    Path writtenFilePath = new Path(new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR), cf.getDatasetAndPartition(metadata).identifier()), cf.getDestination());
    Assert.assertTrue(writtenFilePath.getName().endsWith("insecure_shift"), "Expected encryption name to be appended to destination");
    Assert.assertEquals(IOUtils.toByteArray(new FileInputStream(writtenFilePath.toString())), expectedContents);
}
Also used : TestCopyableDataset(org.apache.gobblin.data.management.copy.TestCopyableDataset) Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) CopyableDatasetMetadata(org.apache.gobblin.data.management.copy.CopyableDatasetMetadata) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) FileInputStream(java.io.FileInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) OwnerAndPermission(org.apache.gobblin.data.management.copy.OwnerAndPermission) FileAwareInputStream(org.apache.gobblin.data.management.copy.FileAwareInputStream) CopyableFile(org.apache.gobblin.data.management.copy.CopyableFile) FsPermission(org.apache.hadoop.fs.permission.FsPermission) Test(org.testng.annotations.Test)

Example 8 with FileAwareInputStream

use of org.apache.gobblin.data.management.copy.FileAwareInputStream in project incubator-gobblin by apache.

the class DecryptConverterTest method testConvertGpgRecord.

@Test(enabled = false)
public void testConvertGpgRecord() throws Exception {
    final String expectedFileContents = "123456789";
    final String passphrase = "12";
    DecryptConverter converter = new DecryptConverter();
    WorkUnitState workUnitState = new WorkUnitState();
    try {
        setEncryptedPassphrase(passphrase, workUnitState);
        converter.init(workUnitState);
        FileSystem fs = FileSystem.getLocal(new Configuration());
        URL url = getClass().getClassLoader().getResource("decryptConverterTest/decrypt-test.txt.gpg");
        Assert.assertNotNull(url);
        String gpgFilePath = url.getFile();
        try (FSDataInputStream gpgFileInput = fs.open(new Path(gpgFilePath))) {
            FileAwareInputStream fileAwareInputStream = new FileAwareInputStream(CopyableFileUtils.getTestCopyableFile(), gpgFileInput);
            Iterable<FileAwareInputStream> iterable = converter.convertRecord("outputSchema", fileAwareInputStream, workUnitState);
            fileAwareInputStream = Iterables.getFirst(iterable, null);
            Assert.assertNotNull(fileAwareInputStream);
            String actual = IOUtils.toString(fileAwareInputStream.getInputStream(), Charsets.UTF_8);
            Assert.assertEquals(actual, expectedFileContents);
        }
    } finally {
        deleteMasterPwdFile();
        converter.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) FileSystem(org.apache.hadoop.fs.FileSystem) FileAwareInputStream(org.apache.gobblin.data.management.copy.FileAwareInputStream) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) URL(java.net.URL) Test(org.testng.annotations.Test)

Example 9 with FileAwareInputStream

use of org.apache.gobblin.data.management.copy.FileAwareInputStream in project incubator-gobblin by apache.

the class UnGzipConverterTest method testExtensionStripping.

@Test
public void testExtensionStripping() throws DataConversionException, IOException {
    List<String> helloWorldFiles = ImmutableList.of("helloworld.txt.gzip", "helloworld.txt.gz");
    UnGzipConverter converter = new UnGzipConverter();
    FileSystem fs = FileSystem.getLocal(new Configuration());
    for (String fileName : helloWorldFiles) {
        String filePath = "unGzipConverterTest/" + fileName;
        String fullPath = getClass().getClassLoader().getResource(filePath).getFile();
        FileAwareInputStream fileAwareInputStream = new FileAwareInputStream(CopyableFileUtils.getTestCopyableFile(filePath, "/tmp/" + fileName, null, null), fs.open(new Path(fullPath)));
        Iterable<FileAwareInputStream> iterable = converter.convertRecord("outputSchema", fileAwareInputStream, new WorkUnitState());
        FileAwareInputStream out = iterable.iterator().next();
        Assert.assertEquals(out.getFile().getDestination().getName(), "helloworld.txt");
        String contents = IOUtils.toString(out.getInputStream(), StandardCharsets.UTF_8);
        Assert.assertEquals(contents, "helloworld\n");
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) WorkUnitState(org.apache.gobblin.configuration.WorkUnitState) FileSystem(org.apache.hadoop.fs.FileSystem) FileAwareInputStream(org.apache.gobblin.data.management.copy.FileAwareInputStream) Test(org.testng.annotations.Test)

Example 10 with FileAwareInputStream

use of org.apache.gobblin.data.management.copy.FileAwareInputStream in project incubator-gobblin by apache.

the class FileAwareInputStreamExtractor method readRecord.

@Override
public FileAwareInputStream readRecord(@Deprecated FileAwareInputStream reuse) throws DataRecordException, IOException {
    if (!this.recordRead) {
        Configuration conf = this.state == null ? HadoopUtils.newConfiguration() : HadoopUtils.getConfFromState(this.state);
        FileSystem fsFromFile = this.file.getOrigin().getPath().getFileSystem(conf);
        this.recordRead = true;
        if (this.file.getFileStatus().isDirectory()) {
            return new FileAwareInputStream(this.file, EmptyInputStream.instance);
        }
        return new FileAwareInputStream(this.file, MeteredInputStream.builder().in(fsFromFile.open(this.file.getFileStatus().getPath())).build());
    }
    return null;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) FileAwareInputStream(org.apache.gobblin.data.management.copy.FileAwareInputStream)

Aggregations

FileAwareInputStream (org.apache.gobblin.data.management.copy.FileAwareInputStream)10 WorkUnitState (org.apache.gobblin.configuration.WorkUnitState)8 Path (org.apache.hadoop.fs.Path)8 Test (org.testng.annotations.Test)8 Configuration (org.apache.hadoop.conf.Configuration)7 FileSystem (org.apache.hadoop.fs.FileSystem)6 CopyableFile (org.apache.gobblin.data.management.copy.CopyableFile)4 FileInputStream (java.io.FileInputStream)3 CopyableDatasetMetadata (org.apache.gobblin.data.management.copy.CopyableDatasetMetadata)3 OwnerAndPermission (org.apache.gobblin.data.management.copy.OwnerAndPermission)3 TestCopyableDataset (org.apache.gobblin.data.management.copy.TestCopyableDataset)3 FileStatus (org.apache.hadoop.fs.FileStatus)3 FsPermission (org.apache.hadoop.fs.permission.FsPermission)3 URL (java.net.URL)2 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 CopyConfiguration (org.apache.gobblin.data.management.copy.CopyConfiguration)1 UnGzipConverter (org.apache.gobblin.data.management.copy.converter.UnGzipConverter)1