Search in sources :

Example 1 with FileItemProgress

use of com.baidu.hugegraph.loader.progress.FileItemProgress in project incubator-hugegraph-toolchain by apache.

the class FileLoadTest method testLoadIncrementalModeAndLoadFailure.

@Test
public void testLoadIncrementalModeAndLoadFailure() throws IOException, InterruptedException {
    ioUtil.write("vertex_person.csv", "name,age,city", "marko,应该是数字,Beijing", "vadas,27,Hongkong", "josh,32,Beijing", "peter,35,Shanghai", "\"li,nary\",26,\"Wu,han\"");
    ioUtil.write("vertex_software.csv", GBK, "name,lang,price", "office,C#,999", "lop,java,应该是数字", "ripple,java,199");
    // 1st time
    String[] args = new String[] { "-f", structPath("incremental_mode_and_load_failure/struct.json"), "-s", configPath("incremental_mode_and_load_failure/schema.groovy"), "-g", GRAPH, "-h", SERVER, "--batch-insert-threads", "2", "--max-parse-errors", "1", "--test-mode", "false" };
    HugeGraphLoader loader = new HugeGraphLoader(args);
    loader.load();
    LoadContext context = Whitebox.getInternalState(loader, "context");
    List<Vertex> vertices = CLIENT.graph().listVertices();
    Assert.assertEquals(4, vertices.size());
    Map<String, InputProgress> inputProgressMap = context.newProgress().inputProgress();
    Assert.assertEquals(1, inputProgressMap.size());
    inputProgressMap.forEach((id, inputProgress) -> {
        if (id.equals("1")) {
            Set<InputItemProgress> loadedItems = inputProgress.loadedItems();
            Assert.assertEquals(1, loadedItems.size());
            InputItemProgress loadedItem = loadedItems.iterator().next();
            Assert.assertTrue(loadedItem instanceof FileItemProgress);
            FileItemProgress fileItem = (FileItemProgress) loadedItem;
            Assert.assertEquals("vertex_person.csv", fileItem.name());
            // Reached last line: "li,nary",26,"Wu,han"
            Assert.assertEquals(6, fileItem.offset());
        }
    });
    File structDir = FileUtils.getFile(structPath("incremental_mode_and_load_failure/struct"));
    File failureDataDir = FileUtils.getFile(structPath("incremental_mode_and_load_failure/struct/failure-data/"));
    File[] files = failureDataDir.listFiles();
    Arrays.sort(files, Comparator.comparing(File::getName));
    Assert.assertNotNull(files);
    Assert.assertEquals(2, files.length);
    File personFailureFile = files[0];
    List<String> personFailureLines = FileUtils.readLines(personFailureFile, Constants.CHARSET);
    Assert.assertEquals(2, personFailureLines.size());
    Assert.assertEquals("marko,应该是数字,Beijing", personFailureLines.get(1));
    // 2nd time, incremental-mode
    args = new String[] { "-f", structPath("incremental_mode_and_load_failure/struct.json"), "-g", GRAPH, "-h", SERVER, "--incremental-mode", "true", "--failure-mode", "false", "--batch-insert-threads", "2", "--max-parse-errors", "2", "--test-mode", "false" };
    loader = new HugeGraphLoader(args);
    loader.load();
    context = Whitebox.getInternalState(loader, "context");
    vertices = CLIENT.graph().listVertices();
    // ripple,java,199 has been loaded
    Assert.assertEquals(6, vertices.size());
    inputProgressMap = context.newProgress().inputProgress();
    Assert.assertEquals(2, inputProgressMap.size());
    inputProgressMap.forEach((id, inputProgress) -> {
        if (id.equals("1")) {
            Set<InputItemProgress> loadedItems = inputProgress.loadedItems();
            Assert.assertEquals(1, loadedItems.size());
            InputItemProgress loadedItem = loadedItems.iterator().next();
            Assert.assertTrue(loadedItem instanceof FileItemProgress);
            FileItemProgress fileItem = (FileItemProgress) loadedItem;
            Assert.assertEquals("vertex_person.csv", fileItem.name());
            // Reached last line: "li,nary",26,"Wu,han"
            Assert.assertEquals(6, fileItem.offset());
        } else if (id.equals("2")) {
            Set<InputItemProgress> loadedItems = inputProgress.loadedItems();
            Assert.assertEquals(1, loadedItems.size());
            InputItemProgress loadedItem = loadedItems.iterator().next();
            Assert.assertTrue(loadedItem instanceof FileItemProgress);
            FileItemProgress fileItem = (FileItemProgress) loadedItem;
            Assert.assertEquals("vertex_software.csv", fileItem.name());
            // Reached last line: "ripple,java,199"
            Assert.assertEquals(4, fileItem.offset());
        }
    });
    Thread.sleep(1000);
    files = failureDataDir.listFiles();
    Arrays.sort(files, Comparator.comparing(File::getName));
    Assert.assertNotNull(files);
    Assert.assertEquals(4, files.length);
    personFailureFile = files[0];
    personFailureLines = FileUtils.readLines(personFailureFile, Constants.CHARSET);
    Assert.assertEquals(2, personFailureLines.size());
    Assert.assertEquals("marko,应该是数字,Beijing", personFailureLines.get(1));
    File softwareFailureFile = files[2];
    List<String> softwareFailureLines = FileUtils.readLines(softwareFailureFile, GBK);
    Assert.assertEquals(2, softwareFailureLines.size());
    Assert.assertEquals("lop,java,应该是数字", softwareFailureLines.get(1));
    // TODO: Change only one line first, and make the second line go wrong
    // modify person and software failure file
    personFailureLines.remove(1);
    personFailureLines.add("marko,29,Beijing");
    FileUtils.writeLines(personFailureFile, personFailureLines, false);
    // modify software failure file
    softwareFailureLines.remove(1);
    softwareFailureLines.add("lop,java,328");
    FileUtils.writeLines(softwareFailureFile, softwareFailureLines, false);
    // 3rd time, --failure-mode
    args = new String[] { "-f", structPath("incremental_mode_and_load_failure/struct.json"), "-g", GRAPH, "-h", SERVER, "--incremental-mode", "false", "--failure-mode", "true", "--batch-insert-threads", "2", "--max-parse-errors", "2", "--test-mode", "false" };
    loader = new HugeGraphLoader(args);
    loader.load();
    context = Whitebox.getInternalState(loader, "context");
    vertices = CLIENT.graph().listVertices();
    // marko,29,Beijing has been loaded
    Assert.assertEquals(8, vertices.size());
    inputProgressMap = context.newProgress().inputProgress();
    Assert.assertEquals(2, inputProgressMap.size());
    inputProgressMap.forEach((id, inputProgress) -> {
        if (id.equals("1")) {
            Set<InputItemProgress> loadedItems = inputProgress.loadedItems();
            Assert.assertEquals(1, loadedItems.size());
            InputItemProgress loadedItem = loadedItems.iterator().next();
            Assert.assertTrue(loadedItem instanceof FileItemProgress);
            FileItemProgress fileItem = (FileItemProgress) loadedItem;
            Assert.assertEquals(2, fileItem.offset());
        } else if (id.equals("2")) {
            Set<InputItemProgress> loadedItems = inputProgress.loadedItems();
            Assert.assertEquals(1, loadedItems.size());
            InputItemProgress loadedItem = loadedItems.iterator().next();
            Assert.assertTrue(loadedItem instanceof FileItemProgress);
            FileItemProgress fileItem = (FileItemProgress) loadedItem;
            Assert.assertEquals(2, fileItem.offset());
        }
    });
    FileUtils.forceDeleteOnExit(structDir);
}
Also used : HugeGraphLoader(com.baidu.hugegraph.loader.HugeGraphLoader) Vertex(com.baidu.hugegraph.structure.graph.Vertex) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) FileItemProgress(com.baidu.hugegraph.loader.progress.FileItemProgress) InputItemProgress(com.baidu.hugegraph.loader.progress.InputItemProgress) InputProgress(com.baidu.hugegraph.loader.progress.InputProgress) LoadContext(com.baidu.hugegraph.loader.executor.LoadContext) File(java.io.File) Test(org.junit.Test)

Example 2 with FileItemProgress

use of com.baidu.hugegraph.loader.progress.FileItemProgress in project incubator-hugegraph-toolchain by apache.

the class FileLoadTest method testReloadJsonFailureFiles.

@Test
public void testReloadJsonFailureFiles() throws IOException, InterruptedException {
    ioUtil.write("vertex_person.csv", "name,age,city", "marko,29,Beijing", "vadas,27,Hongkong", "tom,28,Wuhan");
    ioUtil.write("edge_knows.json", "{\"source_name\": \"marko\", \"target_name\": " + "\"vadas\", \"date\": \"2016-01-10 12:00:00\"," + "\"weight\": 0.5}", // unexisted source and target vertex
    "{\"source_name\": \"marko1\", \"target_name\": " + "\"vadas1\", \"date\": \"2013-02-20 13:00:00\"," + "\"weight\": 1.0}");
    String[] args = new String[] { "-f", structPath("reload_json_failure_files/struct.json"), "-s", configPath("reload_json_failure_files/schema.groovy"), "-g", GRAPH, "-h", SERVER, "--check-vertex", "true", "--batch-insert-threads", "2", "--test-mode", "false" };
    HugeGraphLoader loader = new HugeGraphLoader(args);
    loader.load();
    LoadContext context = Whitebox.getInternalState(loader, "context");
    List<Edge> edges = CLIENT.graph().listEdges();
    Assert.assertEquals(1, edges.size());
    Map<String, InputProgress> inputProgressMap = context.newProgress().inputProgress();
    Assert.assertEquals(2, inputProgressMap.size());
    Assert.assertEquals(ImmutableSet.of("1", "2"), inputProgressMap.keySet());
    inputProgressMap.forEach((id, value) -> {
        if (id.equals("2")) {
            // The error line is exactly last line
            Set<InputItemProgress> loadedItems = value.loadedItems();
            Assert.assertEquals(1, loadedItems.size());
            InputItemProgress loadedItem = loadedItems.iterator().next();
            FileItemProgress fileItem = (FileItemProgress) loadedItem;
            Assert.assertEquals("edge_knows.json", fileItem.name());
            Assert.assertEquals(2, fileItem.offset());
        }
    });
    // Load failure data without modification
    args = new String[] { "-f", structPath("reload_json_failure_files/struct.json"), "-g", GRAPH, "-h", SERVER, "--failure-mode", "true", "--check-vertex", "true", "--batch-insert-threads", "2", "--test-mode", "false" };
    // No exception throw, but error line still exist
    HugeGraphLoader.main(args);
    Thread.sleep(1000);
    // Reload with modification
    File structDir = FileUtils.getFile(structPath("reload_json_failure_files/struct"));
    File failureDir = FileUtils.getFile(structPath("reload_json_failure_files/struct/failure-data/"));
    File[] files = failureDir.listFiles();
    Arrays.sort(files, Comparator.comparing(File::getName));
    Assert.assertNotNull(files);
    Assert.assertEquals(1, files.length);
    File knowsFailureFile = files[0];
    List<String> failureLines = FileUtils.readLines(knowsFailureFile, Constants.CHARSET);
    Assert.assertEquals(2, failureLines.size());
    Assert.assertEquals("{\"source_name\": \"marko1\", \"target_name\": " + "\"vadas1\", \"date\": \"2013-02-20 13:00:00\"," + "\"weight\": 1.0}", failureLines.get(1));
    failureLines.remove(1);
    failureLines.add("{\"source_name\": \"marko\", \"target_name\": " + "\"tom\", \"date\": \"2013-02-20 13:00:00\"," + "\"weight\": 1.0}");
    FileUtils.writeLines(knowsFailureFile, failureLines, false);
    // No exception throw, and error line doesn't exist
    HugeGraphLoader.main(args);
    edges = CLIENT.graph().listEdges();
    Assert.assertEquals(2, edges.size());
    FileUtils.forceDeleteOnExit(structDir);
}
Also used : HugeGraphLoader(com.baidu.hugegraph.loader.HugeGraphLoader) FileItemProgress(com.baidu.hugegraph.loader.progress.FileItemProgress) InputItemProgress(com.baidu.hugegraph.loader.progress.InputItemProgress) InputProgress(com.baidu.hugegraph.loader.progress.InputProgress) LoadContext(com.baidu.hugegraph.loader.executor.LoadContext) Edge(com.baidu.hugegraph.structure.graph.Edge) File(java.io.File) Test(org.junit.Test)

Aggregations

HugeGraphLoader (com.baidu.hugegraph.loader.HugeGraphLoader)2 LoadContext (com.baidu.hugegraph.loader.executor.LoadContext)2 FileItemProgress (com.baidu.hugegraph.loader.progress.FileItemProgress)2 InputItemProgress (com.baidu.hugegraph.loader.progress.InputItemProgress)2 InputProgress (com.baidu.hugegraph.loader.progress.InputProgress)2 File (java.io.File)2 Test (org.junit.Test)2 Edge (com.baidu.hugegraph.structure.graph.Edge)1 Vertex (com.baidu.hugegraph.structure.graph.Vertex)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 Set (java.util.Set)1