use of org.apache.beam.sdk.io.fs.ResourceId in project beam by apache.
the class FileBasedSinkTest method testCopyToOutputFiles.
/** Output files are copied to the destination location with the correct names and contents. */
@Test
public void testCopyToOutputFiles() throws Exception {
SimpleSink.SimpleWriteOperation writeOp = buildWriteOperation();
ResourceId outputDirectory = writeOp.getSink().getBaseOutputDirectoryProvider().get();
List<String> inputFilenames = Arrays.asList("input-1", "input-2", "input-3");
List<String> inputContents = Arrays.asList("1", "2", "3");
List<String> expectedOutputFilenames = Arrays.asList("file-00-of-03.test", "file-01-of-03.test", "file-02-of-03.test");
Map<ResourceId, ResourceId> inputFilePaths = new HashMap<>();
List<ResourceId> expectedOutputPaths = new ArrayList<>();
for (int i = 0; i < inputFilenames.size(); i++) {
// Generate output paths.
expectedOutputPaths.add(getBaseOutputDirectory().resolve(expectedOutputFilenames.get(i), StandardResolveOptions.RESOLVE_FILE));
// Generate and write to input paths.
File inputTmpFile = tmpFolder.newFile(inputFilenames.get(i));
List<String> lines = Collections.singletonList(inputContents.get(i));
writeFile(lines, inputTmpFile);
inputFilePaths.put(LocalResources.fromFile(inputTmpFile, false), writeOp.getSink().getFilenamePolicy().unwindowedFilename(outputDirectory, new Context(i, inputFilenames.size()), ""));
}
// Copy input files to output files.
writeOp.copyToOutputFiles(inputFilePaths);
// Assert that the contents were copied.
for (int i = 0; i < expectedOutputPaths.size(); i++) {
assertFileContains(Collections.singletonList(inputContents.get(i)), expectedOutputPaths.get(i));
}
}
use of org.apache.beam.sdk.io.fs.ResourceId in project beam by apache.
the class LocalResourceIdTest method testResolveInvalidNotDirectory.
@Test
public void testResolveInvalidNotDirectory() throws Exception {
ResourceId tmp = toResourceIdentifier("/root/").resolve("tmp", StandardResolveOptions.RESOLVE_FILE);
thrown.expect(IllegalStateException.class);
thrown.expectMessage("Expected the path is a directory, but had [/root/tmp].");
tmp.resolve("aa", StandardResolveOptions.RESOLVE_FILE);
}
use of org.apache.beam.sdk.io.fs.ResourceId in project beam by apache.
the class GcsResourceIdTest method testResolveInvalidNotDirectory.
@Test
public void testResolveInvalidNotDirectory() throws Exception {
ResourceId tmpDir = toResourceIdentifier("gs://my_bucket/").resolve("tmp dir", StandardResolveOptions.RESOLVE_FILE);
thrown.expect(IllegalStateException.class);
thrown.expectMessage("Expected the gcsPath is a directory, but had [gs://my_bucket/tmp dir].");
tmpDir.resolve("aa", StandardResolveOptions.RESOLVE_FILE);
}
use of org.apache.beam.sdk.io.fs.ResourceId in project beam by apache.
the class FakeJobService method runLoadJob.
private JobStatus runLoadJob(JobReference jobRef, JobConfigurationLoad load) throws InterruptedException, IOException {
TableReference destination = load.getDestinationTable();
TableSchema schema = load.getSchema();
List<ResourceId> sourceFiles = filesForLoadJobs.get(jobRef.getProjectId(), jobRef.getJobId());
WriteDisposition writeDisposition = WriteDisposition.valueOf(load.getWriteDisposition());
CreateDisposition createDisposition = CreateDisposition.valueOf(load.getCreateDisposition());
checkArgument(load.getSourceFormat().equals("NEWLINE_DELIMITED_JSON"));
Table existingTable = datasetService.getTable(destination);
if (!validateDispositions(existingTable, createDisposition, writeDisposition)) {
return new JobStatus().setState("FAILED").setErrorResult(new ErrorProto());
}
datasetService.createTable(new Table().setTableReference(destination).setSchema(schema));
List<TableRow> rows = Lists.newArrayList();
for (ResourceId filename : sourceFiles) {
rows.addAll(readRows(filename.toString()));
}
datasetService.insertAll(destination, rows, null);
return new JobStatus().setState("DONE");
}
use of org.apache.beam.sdk.io.fs.ResourceId in project beam by apache.
the class BigQueryIOTest method testWriteTables.
@Test
public void testWriteTables() throws Exception {
p.enableAbandonedNodeEnforcement(false);
FakeDatasetService datasetService = new FakeDatasetService();
FakeBigQueryServices fakeBqServices = new FakeBigQueryServices().withJobService(new FakeJobService()).withDatasetService(datasetService);
datasetService.createDataset("project-id", "dataset-id", "", "");
long numTables = 3;
long numPartitions = 3;
long numFilesPerPartition = 10;
String jobIdToken = "jobIdToken";
String stepUuid = "stepUuid";
Map<TableDestination, List<String>> expectedTempTables = Maps.newHashMap();
Path baseDir = Files.createTempDirectory(tempFolder, "testWriteTables");
List<KV<ShardedKey<String>, List<String>>> partitions = Lists.newArrayList();
for (int i = 0; i < numTables; ++i) {
String tableName = String.format("project-id:dataset-id.table%05d", i);
TableDestination tableDestination = new TableDestination(tableName, tableName);
for (int j = 0; j < numPartitions; ++j) {
String tempTableId = BigQueryHelpers.createJobId(jobIdToken, tableDestination, j);
List<String> filesPerPartition = Lists.newArrayList();
for (int k = 0; k < numFilesPerPartition; ++k) {
String filename = Paths.get(baseDir.toString(), String.format("files0x%08x_%05d", tempTableId.hashCode(), k)).toString();
ResourceId fileResource = FileSystems.matchNewResource(filename, false);
try (WritableByteChannel channel = FileSystems.create(fileResource, MimeTypes.TEXT)) {
try (OutputStream output = Channels.newOutputStream(channel)) {
TableRow tableRow = new TableRow().set("name", tableName);
TableRowJsonCoder.of().encode(tableRow, output, Context.OUTER);
output.write("\n".getBytes(StandardCharsets.UTF_8));
}
}
filesPerPartition.add(filename);
}
partitions.add(KV.of(ShardedKey.of(tableDestination.getTableSpec(), j), filesPerPartition));
List<String> expectedTables = expectedTempTables.get(tableDestination);
if (expectedTables == null) {
expectedTables = Lists.newArrayList();
expectedTempTables.put(tableDestination, expectedTables);
}
String json = String.format("{\"datasetId\":\"dataset-id\",\"projectId\":\"project-id\",\"tableId\":\"%s\"}", tempTableId);
expectedTables.add(json);
}
}
PCollectionView<String> jobIdTokenView = p.apply("CreateJobId", Create.of("jobId")).apply(View.<String>asSingleton());
PCollectionView<Map<String, String>> schemaMapView = p.apply("CreateEmptySchema", Create.empty(new TypeDescriptor<KV<String, String>>() {
})).apply(View.<String, String>asMap());
WriteTables<String> writeTables = new WriteTables<>(false, fakeBqServices, jobIdTokenView, schemaMapView, WriteDisposition.WRITE_EMPTY, CreateDisposition.CREATE_IF_NEEDED, new IdentityDynamicTables());
DoFnTester<KV<ShardedKey<String>, List<String>>, KV<TableDestination, String>> tester = DoFnTester.of(writeTables);
tester.setSideInput(jobIdTokenView, GlobalWindow.INSTANCE, jobIdToken);
tester.setSideInput(schemaMapView, GlobalWindow.INSTANCE, ImmutableMap.<String, String>of());
tester.getPipelineOptions().setTempLocation("tempLocation");
for (KV<ShardedKey<String>, List<String>> partition : partitions) {
tester.processElement(partition);
}
Map<TableDestination, List<String>> tempTablesResult = Maps.newHashMap();
for (KV<TableDestination, String> element : tester.takeOutputElements()) {
List<String> tables = tempTablesResult.get(element.getKey());
if (tables == null) {
tables = Lists.newArrayList();
tempTablesResult.put(element.getKey(), tables);
}
tables.add(element.getValue());
}
assertEquals(expectedTempTables, tempTablesResult);
}
Aggregations