use of org.apache.gobblin.data.management.copy.FileAwareInputStream in project incubator-gobblin by apache.
the class InputStreamExtractorTest method testReadRecord.
@Test
public void testReadRecord() throws Exception {
CopyableFile file = getTestCopyableFile("inputStreamExtractorTest/first.txt");
FileAwareInputStreamExtractor extractor = new FileAwareInputStreamExtractor(FileSystem.getLocal(new Configuration()), file);
FileAwareInputStream fileAwareInputStream = extractor.readRecord(null);
Assert.assertEquals(fileAwareInputStream.getFile().getOrigin().getPath(), file.getOrigin().getPath());
Assert.assertEquals(IOUtils.toString(fileAwareInputStream.getInputStream()), "first");
Assert.assertNull(extractor.readRecord(null));
}
use of org.apache.gobblin.data.management.copy.FileAwareInputStream in project incubator-gobblin by apache.
the class FileAwareInputStreamDataWriterTest method testWriteWithEncryption.
@Test
public void testWriteWithEncryption() throws Exception {
byte[] streamString = "testEncryptedContents".getBytes("UTF-8");
byte[] expectedContents = new byte[streamString.length];
for (int i = 0; i < streamString.length; i++) {
expectedContents[i] = (byte) ((streamString[i] + 1) % 256);
}
FileStatus status = fs.getFileStatus(testTempPath);
OwnerAndPermission ownerAndPermission = new OwnerAndPermission(status.getOwner(), status.getGroup(), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
CopyableFile cf = CopyableFileUtils.getTestCopyableFile(ownerAndPermission);
CopyableDatasetMetadata metadata = new CopyableDatasetMetadata(new TestCopyableDataset(new Path("/source")));
WorkUnitState state = TestUtils.createTestWorkUnitState();
state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, new Path(testTempPath, "staging").toString());
state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, new Path(testTempPath, "output").toString());
state.setProp(ConfigurationKeys.WRITER_FILE_PATH, RandomStringUtils.randomAlphabetic(5));
state.setProp("writer.encrypt." + EncryptionConfigParser.ENCRYPTION_ALGORITHM_KEY, "insecure_shift");
CopySource.serializeCopyEntity(state, cf);
CopySource.serializeCopyableDataset(state, metadata);
FileAwareInputStreamDataWriter dataWriter = new FileAwareInputStreamDataWriter(state, 1, 0);
FileAwareInputStream fileAwareInputStream = new FileAwareInputStream(cf, StreamUtils.convertStream(new ByteArrayInputStream(streamString)));
dataWriter.write(fileAwareInputStream);
dataWriter.commit();
Path writtenFilePath = new Path(new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR), cf.getDatasetAndPartition(metadata).identifier()), cf.getDestination());
Assert.assertTrue(writtenFilePath.getName().endsWith("insecure_shift"), "Expected encryption name to be appended to destination");
Assert.assertEquals(IOUtils.toByteArray(new FileInputStream(writtenFilePath.toString())), expectedContents);
}
use of org.apache.gobblin.data.management.copy.FileAwareInputStream in project incubator-gobblin by apache.
the class DecryptConverterTest method testConvertGpgRecord.
@Test(enabled = false)
public void testConvertGpgRecord() throws Exception {
final String expectedFileContents = "123456789";
final String passphrase = "12";
DecryptConverter converter = new DecryptConverter();
WorkUnitState workUnitState = new WorkUnitState();
try {
setEncryptedPassphrase(passphrase, workUnitState);
converter.init(workUnitState);
FileSystem fs = FileSystem.getLocal(new Configuration());
URL url = getClass().getClassLoader().getResource("decryptConverterTest/decrypt-test.txt.gpg");
Assert.assertNotNull(url);
String gpgFilePath = url.getFile();
try (FSDataInputStream gpgFileInput = fs.open(new Path(gpgFilePath))) {
FileAwareInputStream fileAwareInputStream = new FileAwareInputStream(CopyableFileUtils.getTestCopyableFile(), gpgFileInput);
Iterable<FileAwareInputStream> iterable = converter.convertRecord("outputSchema", fileAwareInputStream, workUnitState);
fileAwareInputStream = Iterables.getFirst(iterable, null);
Assert.assertNotNull(fileAwareInputStream);
String actual = IOUtils.toString(fileAwareInputStream.getInputStream(), Charsets.UTF_8);
Assert.assertEquals(actual, expectedFileContents);
}
} finally {
deleteMasterPwdFile();
converter.close();
}
}
use of org.apache.gobblin.data.management.copy.FileAwareInputStream in project incubator-gobblin by apache.
the class UnGzipConverterTest method testExtensionStripping.
@Test
public void testExtensionStripping() throws DataConversionException, IOException {
List<String> helloWorldFiles = ImmutableList.of("helloworld.txt.gzip", "helloworld.txt.gz");
UnGzipConverter converter = new UnGzipConverter();
FileSystem fs = FileSystem.getLocal(new Configuration());
for (String fileName : helloWorldFiles) {
String filePath = "unGzipConverterTest/" + fileName;
String fullPath = getClass().getClassLoader().getResource(filePath).getFile();
FileAwareInputStream fileAwareInputStream = new FileAwareInputStream(CopyableFileUtils.getTestCopyableFile(filePath, "/tmp/" + fileName, null, null), fs.open(new Path(fullPath)));
Iterable<FileAwareInputStream> iterable = converter.convertRecord("outputSchema", fileAwareInputStream, new WorkUnitState());
FileAwareInputStream out = iterable.iterator().next();
Assert.assertEquals(out.getFile().getDestination().getName(), "helloworld.txt");
String contents = IOUtils.toString(out.getInputStream(), StandardCharsets.UTF_8);
Assert.assertEquals(contents, "helloworld\n");
}
}
use of org.apache.gobblin.data.management.copy.FileAwareInputStream in project incubator-gobblin by apache.
the class FileAwareInputStreamExtractor method readRecord.
@Override
public FileAwareInputStream readRecord(@Deprecated FileAwareInputStream reuse) throws DataRecordException, IOException {
if (!this.recordRead) {
Configuration conf = this.state == null ? HadoopUtils.newConfiguration() : HadoopUtils.getConfFromState(this.state);
FileSystem fsFromFile = this.file.getOrigin().getPath().getFileSystem(conf);
this.recordRead = true;
if (this.file.getFileStatus().isDirectory()) {
return new FileAwareInputStream(this.file, EmptyInputStream.instance);
}
return new FileAwareInputStream(this.file, MeteredInputStream.builder().in(fsFromFile.open(this.file.getFileStatus().getPath())).build());
}
return null;
}
Aggregations