use of org.apache.gobblin.data.management.copy.FileAwareInputStream in project incubator-gobblin by apache.
the class FileAwareInputStreamDataWriterTest method testWrite.
@Test
public void testWrite() throws Exception {
String streamString = "testContents";
FileStatus status = fs.getFileStatus(testTempPath);
OwnerAndPermission ownerAndPermission = new OwnerAndPermission(status.getOwner(), status.getGroup(), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
CopyableFile cf = CopyableFileUtils.getTestCopyableFile(ownerAndPermission);
CopyableDatasetMetadata metadata = new CopyableDatasetMetadata(new TestCopyableDataset(new Path("/source")));
WorkUnitState state = TestUtils.createTestWorkUnitState();
state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, new Path(testTempPath, "staging").toString());
state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, new Path(testTempPath, "output").toString());
state.setProp(ConfigurationKeys.WRITER_FILE_PATH, RandomStringUtils.randomAlphabetic(5));
CopySource.serializeCopyEntity(state, cf);
CopySource.serializeCopyableDataset(state, metadata);
FileAwareInputStreamDataWriter dataWriter = new FileAwareInputStreamDataWriter(state, 1, 0);
FileAwareInputStream fileAwareInputStream = new FileAwareInputStream(cf, StreamUtils.convertStream(IOUtils.toInputStream(streamString)));
dataWriter.write(fileAwareInputStream);
dataWriter.commit();
Path writtenFilePath = new Path(new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR), cf.getDatasetAndPartition(metadata).identifier()), cf.getDestination());
Assert.assertEquals(IOUtils.toString(new FileInputStream(writtenFilePath.toString())), streamString);
}
use of org.apache.gobblin.data.management.copy.FileAwareInputStream in project incubator-gobblin by apache.
the class TarArchiveInputStreamDataWriterTest method testWrite.
@Test(dataProvider = "testFileDataProvider")
public void testWrite(final String filePath, final String newFileName, final String expectedText) throws Exception {
String expectedFileContents = "text";
String fileNameInArchive = "text.txt";
WorkUnitState state = TestUtils.createTestWorkUnitState();
state.setProp(ConfigurationKeys.WRITER_STAGING_DIR, new Path(testTempPath, "staging").toString());
state.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, new Path(testTempPath, "output").toString());
state.setProp(ConfigurationKeys.WRITER_FILE_PATH, "writer_file_path_" + RandomStringUtils.randomAlphabetic(5));
CopyableDatasetMetadata metadata = new CopyableDatasetMetadata(new TestCopyableDataset(new Path("/source")));
CopySource.serializeCopyableDataset(state, metadata);
FileAwareInputStream fileAwareInputStream = getCompressedInputStream(filePath, newFileName);
CopySource.serializeCopyEntity(state, fileAwareInputStream.getFile());
TarArchiveInputStreamDataWriter dataWriter = new TarArchiveInputStreamDataWriter(state, 1, 0);
dataWriter.write(fileAwareInputStream);
dataWriter.commit();
// the archive file contains file test.txt
Path unArchivedFilePath = new Path(fileAwareInputStream.getFile().getDestination(), fileNameInArchive);
// Path at which the writer writes text.txt
Path taskOutputFilePath = new Path(new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR), fileAwareInputStream.getFile().getDatasetAndPartition(metadata).identifier()), PathUtils.withoutLeadingSeparator(unArchivedFilePath));
Assert.assertEquals(IOUtils.toString(new FileInputStream(taskOutputFilePath.toString())).trim(), expectedFileContents);
}
use of org.apache.gobblin.data.management.copy.FileAwareInputStream in project incubator-gobblin by apache.
the class TarArchiveInputStreamDataWriterTest method getCompressedInputStream.
/**
* Find the test compressed file <code><filePath/code> in classpath and read it as a {@link FileAwareInputStream}
*/
private FileAwareInputStream getCompressedInputStream(final String filePath, final String newFileName) throws Exception {
UnGzipConverter converter = new UnGzipConverter();
FileSystem fs = FileSystem.getLocal(new Configuration());
String fullPath = getClass().getClassLoader().getResource(filePath).getFile();
FileStatus status = fs.getFileStatus(testTempPath);
OwnerAndPermission ownerAndPermission = new OwnerAndPermission(status.getOwner(), status.getGroup(), new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
CopyableFile cf = CopyableFileUtils.getTestCopyableFile(filePath, new Path(testTempPath, newFileName).toString(), newFileName, ownerAndPermission);
FileAwareInputStream fileAwareInputStream = new FileAwareInputStream(cf, fs.open(new Path(fullPath)));
Iterable<FileAwareInputStream> iterable = converter.convertRecord("outputSchema", fileAwareInputStream, new WorkUnitState());
return Iterables.getFirst(iterable, null);
}
use of org.apache.gobblin.data.management.copy.FileAwareInputStream in project incubator-gobblin by apache.
the class DecryptConverterTest method testConvertDifferentEncryption.
@Test(enabled = false)
public void testConvertDifferentEncryption() throws IOException, DataConversionException {
final String expectedFileContents = "2345678";
WorkUnitState workUnitState = new WorkUnitState();
workUnitState.getJobState().setProp("converter.encrypt." + EncryptionConfigParser.ENCRYPTION_ALGORITHM_KEY, "insecure_shift");
try (DecryptConverter converter = new DecryptConverter()) {
converter.init(workUnitState);
FileSystem fs = FileSystem.getLocal(new Configuration());
URL url = getClass().getClassLoader().getResource("decryptConverterTest/decrypt-test.txt.insecure_shift");
Assert.assertNotNull(url);
String testFilePath = url.getFile();
try (FSDataInputStream testFileInput = fs.open(new Path(testFilePath))) {
FileAwareInputStream fileAwareInputStream = new FileAwareInputStream(CopyableFileUtils.getTestCopyableFile(), testFileInput);
fileAwareInputStream.getFile().setDestination(new Path("file:///tmp/decrypt-test.txt.insecure_shift"));
Iterable<FileAwareInputStream> iterable = converter.convertRecord("outputSchema", fileAwareInputStream, workUnitState);
FileAwareInputStream decryptedStream = Iterables.getFirst(iterable, null);
Assert.assertNotNull(decryptedStream);
String actual = IOUtils.toString(decryptedStream.getInputStream(), Charsets.UTF_8);
Assert.assertEquals(actual, expectedFileContents);
Assert.assertEquals(decryptedStream.getFile().getDestination().getName(), "decrypt-test.txt");
}
}
}
use of org.apache.gobblin.data.management.copy.FileAwareInputStream in project incubator-gobblin by apache.
the class UnGzipConverterTest method testGz.
@Test(dataProvider = "fileDataProvider")
public void testGz(final String filePath, final String expectedText) throws Exception {
UnGzipConverter converter = new UnGzipConverter();
FileSystem fs = FileSystem.getLocal(new Configuration());
String fullPath = getClass().getClassLoader().getResource(filePath).getFile();
FileAwareInputStream fileAwareInputStream = new FileAwareInputStream(CopyableFileUtils.getTestCopyableFile(filePath), fs.open(new Path(fullPath)));
Iterable<FileAwareInputStream> iterable = converter.convertRecord("outputSchema", fileAwareInputStream, new WorkUnitState());
String actual = readGzipStreamAsString(Iterables.getFirst(iterable, null).getInputStream());
Assert.assertEquals(actual.trim(), expectedText);
}
Aggregations