Search in sources :

Example 1 with ReadImportManifest

use of com.google.cloud.teleport.spanner.TextImportTransform.ReadImportManifest in project DataflowTemplates by GoogleCloudPlatform.

the class TextImportTransformTest method readImportManifestInvalidTable.

@Test
public void readImportManifestInvalidTable() throws Exception {
    Path f11 = Files.createTempFile("table1-file", "1");
    Path manifestFile = Files.createTempFile("import-manifest", ".json");
    Charset charset = Charset.forName("UTF-8");
    try (BufferedWriter writer = Files.newBufferedWriter(manifestFile, charset)) {
        String jsonString = String.format("{\"tables\": [" + "{\"table_name\": \"NON_EXIST_TABLE\"," + "\"file_patterns\":[\"%s\"]}" + "]}", f11.toString());
        writer.write(jsonString, 0, jsonString.length());
    } catch (IOException e) {
        e.printStackTrace();
    }
    ValueProvider<String> importManifest = ValueProvider.StaticValueProvider.of(manifestFile.toString());
    PCollectionView<Ddl> ddlView = pipeline.apply("ddl", Create.of(getTestDdl())).apply(View.asSingleton());
    PCollection<KV<String, String>> tableAndFiles = pipeline.apply("Read manifest file", new ReadImportManifest(importManifest)).apply("Resolve data files", new ResolveDataFiles(importManifest, ddlView));
    try {
        pipeline.run();
    } catch (PipelineExecutionException e) {
        assertThat(e.getMessage(), equalTo("java.lang.RuntimeException: Table NON_EXIST_TABLE not found in the database. " + "Table must be pre-created in database"));
    }
}
Also used : Path(java.nio.file.Path) Charset(java.nio.charset.Charset) IOException(java.io.IOException) KV(org.apache.beam.sdk.values.KV) Ddl(com.google.cloud.teleport.spanner.ddl.Ddl) BufferedWriter(java.io.BufferedWriter) ResolveDataFiles(com.google.cloud.teleport.spanner.TextImportTransform.ResolveDataFiles) PipelineExecutionException(org.apache.beam.sdk.Pipeline.PipelineExecutionException) ReadImportManifest(com.google.cloud.teleport.spanner.TextImportTransform.ReadImportManifest) Test(org.junit.Test)

Example 2 with ReadImportManifest

use of com.google.cloud.teleport.spanner.TextImportTransform.ReadImportManifest in project DataflowTemplates by GoogleCloudPlatform.

the class TextImportTransformTest method readImportManifest.

@Test
public void readImportManifest() throws Exception {
    Path f11 = Files.createTempFile("table1-file", "1");
    Path f12 = Files.createTempFile("table1-file", "2");
    Path f13 = Files.createTempFile("table1-file", "3");
    Path f21 = Files.createTempFile("table2-file", "1");
    Path f22 = Files.createTempFile("table2-file", "2");
    String tempDir = f11.getParent().toString();
    Path manifestFile = Files.createTempFile("import-manifest", ".json");
    Charset charset = Charset.forName("UTF-8");
    try (BufferedWriter writer = Files.newBufferedWriter(manifestFile, charset)) {
        String jsonString = String.format("{\"tables\": [" + "{\"table_name\": \"table1\"," + "\"file_patterns\":[\"%s\",\"%s\"]}," + "{\"table_name\": \"table2\"," + "\"file_patterns\":[\"%s\"]}" + "]}", f11.toString(), f12.toString(), f21.toString());
        writer.write(jsonString, 0, jsonString.length());
    } catch (IOException e) {
        e.printStackTrace();
    }
    ValueProvider<String> importManifest = ValueProvider.StaticValueProvider.of(manifestFile.toString());
    PCollectionView<Ddl> ddlView = pipeline.apply("ddl", Create.of(getTestDdl())).apply(View.asSingleton());
    PCollection<KV<String, String>> tableAndFiles = pipeline.apply("Read manifest file", new ReadImportManifest(importManifest)).apply("Resolve data files", new ResolveDataFiles(importManifest, ddlView));
    // Validates that only the file patterns specified in manifest will be returned.
    // E.g., f13 and f22 are not in the tableAndFiles result.
    PAssert.that(tableAndFiles).containsInAnyOrder(KV.of("table1", f11.toString()), KV.of("table1", f12.toString()), KV.of("table2", f21.toString()));
    pipeline.run();
}
Also used : Path(java.nio.file.Path) ResolveDataFiles(com.google.cloud.teleport.spanner.TextImportTransform.ResolveDataFiles) ReadImportManifest(com.google.cloud.teleport.spanner.TextImportTransform.ReadImportManifest) Charset(java.nio.charset.Charset) IOException(java.io.IOException) KV(org.apache.beam.sdk.values.KV) Ddl(com.google.cloud.teleport.spanner.ddl.Ddl) BufferedWriter(java.io.BufferedWriter) Test(org.junit.Test)

Example 3 with ReadImportManifest

use of com.google.cloud.teleport.spanner.TextImportTransform.ReadImportManifest in project DataflowTemplates by GoogleCloudPlatform.

the class TextImportTransformTest method readImportManifestInvalidManifestFormat.

@Test(expected = PipelineExecutionException.class)
public void readImportManifestInvalidManifestFormat() throws Exception {
    Path f11 = Files.createTempFile("table1-file", "1");
    Path f12 = Files.createTempFile("table1-file", "2");
    Path f13 = Files.createTempFile("table1-file", "3");
    String tempDir = f11.getParent().toString();
    Path manifestFile = Files.createTempFile("import-manifest", ".json");
    Charset charset = Charset.forName("UTF-8");
    try (BufferedWriter writer = Files.newBufferedWriter(manifestFile, charset)) {
        // An invalid json string (missing the ending close "}").
        String jsonString = String.format("{\"tables\": [" + "{\"table_name\": \"table1\"," + "\"file_patterns\":[\"%s\",\"%s\"]}," + "{\"table_name\": \"table2\"," + "\"file_patterns\":[\"*\"]}" + "]", f11.toString(), f12.toString());
        writer.write(jsonString, 0, jsonString.length());
    } catch (IOException e) {
        e.printStackTrace();
    }
    ValueProvider<String> importManifest = ValueProvider.StaticValueProvider.of(manifestFile.toString());
    PCollectionView<Ddl> ddlView = pipeline.apply("ddl", Create.of(getTestDdl())).apply(View.asSingleton());
    PCollection<KV<String, String>> tableAndFiles = pipeline.apply("Read manifest file", new ReadImportManifest(importManifest)).apply("Resolve data files", new ResolveDataFiles(importManifest, ddlView));
    pipeline.run();
}
Also used : Path(java.nio.file.Path) ResolveDataFiles(com.google.cloud.teleport.spanner.TextImportTransform.ResolveDataFiles) ReadImportManifest(com.google.cloud.teleport.spanner.TextImportTransform.ReadImportManifest) Charset(java.nio.charset.Charset) IOException(java.io.IOException) KV(org.apache.beam.sdk.values.KV) Ddl(com.google.cloud.teleport.spanner.ddl.Ddl) BufferedWriter(java.io.BufferedWriter) Test(org.junit.Test)

Example 4 with ReadImportManifest

use of com.google.cloud.teleport.spanner.TextImportTransform.ReadImportManifest in project DataflowTemplates by GoogleCloudPlatform.

the class TextImportTransformTest method readImportManifestGeneratedColumn.

@Test
public void readImportManifestGeneratedColumn() throws Exception {
    Path f31 = Files.createTempFile("table3-file", "1");
    Path manifestFile = Files.createTempFile("import-manifest", ".json");
    Charset charset = Charset.forName("UTF-8");
    try (BufferedWriter writer = Files.newBufferedWriter(manifestFile, charset)) {
        String jsonString = String.format("{\"tables\": [" + "{\"table_name\": \"table3\"," + "\"file_patterns\": [\"%s\"]," + "\"columns\": [{\"column_name\": \"int_col\", \"type_name\": \"INT64\"}]}" + "]}", f31.toString());
        writer.write(jsonString, 0, jsonString.length());
    } catch (IOException e) {
        e.printStackTrace();
    }
    ValueProvider<String> importManifest = ValueProvider.StaticValueProvider.of(manifestFile.toString());
    PCollectionView<Ddl> ddlView = pipeline.apply("ddl", Create.of(getTestDdl())).apply(View.asSingleton());
    PCollection<KV<String, String>> tableAndFiles = pipeline.apply("Read manifest file", new ReadImportManifest(importManifest)).apply("Resolve data files", new ResolveDataFiles(importManifest, ddlView));
    pipeline.run();
}
Also used : Path(java.nio.file.Path) ResolveDataFiles(com.google.cloud.teleport.spanner.TextImportTransform.ResolveDataFiles) ReadImportManifest(com.google.cloud.teleport.spanner.TextImportTransform.ReadImportManifest) Charset(java.nio.charset.Charset) IOException(java.io.IOException) KV(org.apache.beam.sdk.values.KV) Ddl(com.google.cloud.teleport.spanner.ddl.Ddl) BufferedWriter(java.io.BufferedWriter) Test(org.junit.Test)

Example 5 with ReadImportManifest

use of com.google.cloud.teleport.spanner.TextImportTransform.ReadImportManifest in project DataflowTemplates by GoogleCloudPlatform.

the class TextImportTransformTest method readImportManifestUtfWithBOM.

@Test
public void readImportManifestUtfWithBOM() throws Exception {
    Path f11 = Files.createTempFile("table1-file", "1");
    String tempDir = f11.getParent().toString();
    Path manifestFile = Files.createTempFile("import-manifest", ".json");
    Charset charset = Charset.forName("UTF-8");
    try (BufferedWriter writer = Files.newBufferedWriter(manifestFile, charset)) {
        String jsonString = String.format("\uFEFF{\"tables\": [" + "{\"table_name\": \"table1\"," + "\"file_patterns\":[\"%s\"]}" + "]}", f11.toString());
        writer.write(jsonString, 0, jsonString.length());
    } catch (IOException e) {
        e.printStackTrace();
    }
    ValueProvider<String> importManifest = ValueProvider.StaticValueProvider.of(manifestFile.toString());
    PCollectionView<Ddl> ddlView = pipeline.apply("ddl", Create.of(getTestDdl())).apply(View.asSingleton());
    PCollection<KV<String, String>> tableAndFiles = pipeline.apply("Read manifest file", new ReadImportManifest(importManifest)).apply("Resolve data files", new ResolveDataFiles(importManifest, ddlView));
    PAssert.that(tableAndFiles).containsInAnyOrder(KV.of("table1", f11.toString()));
    pipeline.run();
}
Also used : Path(java.nio.file.Path) ResolveDataFiles(com.google.cloud.teleport.spanner.TextImportTransform.ResolveDataFiles) ReadImportManifest(com.google.cloud.teleport.spanner.TextImportTransform.ReadImportManifest) Charset(java.nio.charset.Charset) IOException(java.io.IOException) KV(org.apache.beam.sdk.values.KV) Ddl(com.google.cloud.teleport.spanner.ddl.Ddl) BufferedWriter(java.io.BufferedWriter) Test(org.junit.Test)

Aggregations

ReadImportManifest (com.google.cloud.teleport.spanner.TextImportTransform.ReadImportManifest)10 ResolveDataFiles (com.google.cloud.teleport.spanner.TextImportTransform.ResolveDataFiles)10 Ddl (com.google.cloud.teleport.spanner.ddl.Ddl)10 BufferedWriter (java.io.BufferedWriter)10 Charset (java.nio.charset.Charset)10 Path (java.nio.file.Path)10 KV (org.apache.beam.sdk.values.KV)10 Test (org.junit.Test)10 IOException (java.io.IOException)8 PipelineExecutionException (org.apache.beam.sdk.Pipeline.PipelineExecutionException)5