Search in sources :

Example 1 with InputItemProgress

use of com.baidu.hugegraph.loader.progress.InputItemProgress in project incubator-hugegraph-toolchain by apache.

the class InputProgressDeser method readInputProgress.

@SuppressWarnings("unchecked")
private static InputProgress readInputProgress(JsonNode node) {
    JsonNode typeNode = getNode(node, FIELD_TYPE, JsonNodeType.STRING);
    String type = typeNode.asText().toUpperCase();
    SourceType sourceType = SourceType.valueOf(type);
    JsonNode loadedItemsNode = getNode(node, FIELD_LOADED_ITEMS, JsonNodeType.ARRAY);
    JsonNode loadingItemNode = getNode(node, FIELD_LOADING_ITEM, JsonNodeType.OBJECT, JsonNodeType.NULL);
    Set<InputItemProgress> loadedItems;
    InputItemProgress loadingItem;
    switch(sourceType) {
        case FILE:
        case HDFS:
            loadedItems = (Set<InputItemProgress>) (Object) JsonUtil.convertSet(loadedItemsNode, FileItemProgress.class);
            loadingItem = JsonUtil.convert(loadingItemNode, FileItemProgress.class);
            break;
        case JDBC:
        default:
            throw new AssertionError(String.format("Unsupported input source '%s'", type));
    }
    return new InputProgress(sourceType, loadedItems, loadingItem);
}
Also used : FileItemProgress(com.baidu.hugegraph.loader.progress.FileItemProgress) InputProgress(com.baidu.hugegraph.loader.progress.InputProgress) SourceType(com.baidu.hugegraph.loader.source.SourceType) InputItemProgress(com.baidu.hugegraph.loader.progress.InputItemProgress) JsonNode(com.fasterxml.jackson.databind.JsonNode)

Example 2 with InputItemProgress

use of com.baidu.hugegraph.loader.progress.InputItemProgress in project incubator-hugegraph-toolchain by apache.

the class FileReader method checkLastLoadStatus.

private LoadStatus checkLastLoadStatus(Readable readable) {
    // NOTE: calculate check sum is a bit time consuming
    InputItemProgress input = readable.inputItemProgress();
    InputItemProgress loaded = this.oldProgress.matchLoadedItem(input);
    // The file has been loaded before and it is not changed
    if (loaded != null) {
        this.newProgress.addLoadedItem(loaded);
        return LoadStatus.LOADED;
    }
    InputItemProgress loading = this.oldProgress.matchLoadingItem(input);
    if (loading != null) {
        // The file has been loaded half before and it is not changed
        this.newProgress.addLoadingItem(loading);
        return LoadStatus.LOADED_HALF;
    } else {
        this.newProgress.addLoadingItem(input);
        return LoadStatus.NOT_LOADED;
    }
}
Also used : InputItemProgress(com.baidu.hugegraph.loader.progress.InputItemProgress)

Example 3 with InputItemProgress

use of com.baidu.hugegraph.loader.progress.InputItemProgress in project incubator-hugegraph-toolchain by apache.

the class FileLoadTest method testLoadIncrementalModeAndLoadFailure.

@Test
public void testLoadIncrementalModeAndLoadFailure() throws IOException, InterruptedException {
    ioUtil.write("vertex_person.csv", "name,age,city", "marko,应该是数字,Beijing", "vadas,27,Hongkong", "josh,32,Beijing", "peter,35,Shanghai", "\"li,nary\",26,\"Wu,han\"");
    ioUtil.write("vertex_software.csv", GBK, "name,lang,price", "office,C#,999", "lop,java,应该是数字", "ripple,java,199");
    // 1st time
    String[] args = new String[] { "-f", structPath("incremental_mode_and_load_failure/struct.json"), "-s", configPath("incremental_mode_and_load_failure/schema.groovy"), "-g", GRAPH, "-h", SERVER, "--batch-insert-threads", "2", "--max-parse-errors", "1", "--test-mode", "false" };
    HugeGraphLoader loader = new HugeGraphLoader(args);
    loader.load();
    LoadContext context = Whitebox.getInternalState(loader, "context");
    List<Vertex> vertices = CLIENT.graph().listVertices();
    Assert.assertEquals(4, vertices.size());
    Map<String, InputProgress> inputProgressMap = context.newProgress().inputProgress();
    Assert.assertEquals(1, inputProgressMap.size());
    inputProgressMap.forEach((id, inputProgress) -> {
        if (id.equals("1")) {
            Set<InputItemProgress> loadedItems = inputProgress.loadedItems();
            Assert.assertEquals(1, loadedItems.size());
            InputItemProgress loadedItem = loadedItems.iterator().next();
            Assert.assertTrue(loadedItem instanceof FileItemProgress);
            FileItemProgress fileItem = (FileItemProgress) loadedItem;
            Assert.assertEquals("vertex_person.csv", fileItem.name());
            // Reached last line: "li,nary",26,"Wu,han"
            Assert.assertEquals(6, fileItem.offset());
        }
    });
    File structDir = FileUtils.getFile(structPath("incremental_mode_and_load_failure/struct"));
    File failureDataDir = FileUtils.getFile(structPath("incremental_mode_and_load_failure/struct/failure-data/"));
    File[] files = failureDataDir.listFiles();
    Arrays.sort(files, Comparator.comparing(File::getName));
    Assert.assertNotNull(files);
    Assert.assertEquals(2, files.length);
    File personFailureFile = files[0];
    List<String> personFailureLines = FileUtils.readLines(personFailureFile, Constants.CHARSET);
    Assert.assertEquals(2, personFailureLines.size());
    Assert.assertEquals("marko,应该是数字,Beijing", personFailureLines.get(1));
    // 2nd time, incremental-mode
    args = new String[] { "-f", structPath("incremental_mode_and_load_failure/struct.json"), "-g", GRAPH, "-h", SERVER, "--incremental-mode", "true", "--failure-mode", "false", "--batch-insert-threads", "2", "--max-parse-errors", "2", "--test-mode", "false" };
    loader = new HugeGraphLoader(args);
    loader.load();
    context = Whitebox.getInternalState(loader, "context");
    vertices = CLIENT.graph().listVertices();
    // ripple,java,199 has been loaded
    Assert.assertEquals(6, vertices.size());
    inputProgressMap = context.newProgress().inputProgress();
    Assert.assertEquals(2, inputProgressMap.size());
    inputProgressMap.forEach((id, inputProgress) -> {
        if (id.equals("1")) {
            Set<InputItemProgress> loadedItems = inputProgress.loadedItems();
            Assert.assertEquals(1, loadedItems.size());
            InputItemProgress loadedItem = loadedItems.iterator().next();
            Assert.assertTrue(loadedItem instanceof FileItemProgress);
            FileItemProgress fileItem = (FileItemProgress) loadedItem;
            Assert.assertEquals("vertex_person.csv", fileItem.name());
            // Reached last line: "li,nary",26,"Wu,han"
            Assert.assertEquals(6, fileItem.offset());
        } else if (id.equals("2")) {
            Set<InputItemProgress> loadedItems = inputProgress.loadedItems();
            Assert.assertEquals(1, loadedItems.size());
            InputItemProgress loadedItem = loadedItems.iterator().next();
            Assert.assertTrue(loadedItem instanceof FileItemProgress);
            FileItemProgress fileItem = (FileItemProgress) loadedItem;
            Assert.assertEquals("vertex_software.csv", fileItem.name());
            // Reached last line: "ripple,java,199"
            Assert.assertEquals(4, fileItem.offset());
        }
    });
    Thread.sleep(1000);
    files = failureDataDir.listFiles();
    Arrays.sort(files, Comparator.comparing(File::getName));
    Assert.assertNotNull(files);
    Assert.assertEquals(4, files.length);
    personFailureFile = files[0];
    personFailureLines = FileUtils.readLines(personFailureFile, Constants.CHARSET);
    Assert.assertEquals(2, personFailureLines.size());
    Assert.assertEquals("marko,应该是数字,Beijing", personFailureLines.get(1));
    File softwareFailureFile = files[2];
    List<String> softwareFailureLines = FileUtils.readLines(softwareFailureFile, GBK);
    Assert.assertEquals(2, softwareFailureLines.size());
    Assert.assertEquals("lop,java,应该是数字", softwareFailureLines.get(1));
    // TODO: Change only one line first, and make the second line go wrong
    // modify person and software failure file
    personFailureLines.remove(1);
    personFailureLines.add("marko,29,Beijing");
    FileUtils.writeLines(personFailureFile, personFailureLines, false);
    // modify software failure file
    softwareFailureLines.remove(1);
    softwareFailureLines.add("lop,java,328");
    FileUtils.writeLines(softwareFailureFile, softwareFailureLines, false);
    // 3rd time, --failure-mode
    args = new String[] { "-f", structPath("incremental_mode_and_load_failure/struct.json"), "-g", GRAPH, "-h", SERVER, "--incremental-mode", "false", "--failure-mode", "true", "--batch-insert-threads", "2", "--max-parse-errors", "2", "--test-mode", "false" };
    loader = new HugeGraphLoader(args);
    loader.load();
    context = Whitebox.getInternalState(loader, "context");
    vertices = CLIENT.graph().listVertices();
    // marko,29,Beijing has been loaded
    Assert.assertEquals(8, vertices.size());
    inputProgressMap = context.newProgress().inputProgress();
    Assert.assertEquals(2, inputProgressMap.size());
    inputProgressMap.forEach((id, inputProgress) -> {
        if (id.equals("1")) {
            Set<InputItemProgress> loadedItems = inputProgress.loadedItems();
            Assert.assertEquals(1, loadedItems.size());
            InputItemProgress loadedItem = loadedItems.iterator().next();
            Assert.assertTrue(loadedItem instanceof FileItemProgress);
            FileItemProgress fileItem = (FileItemProgress) loadedItem;
            Assert.assertEquals(2, fileItem.offset());
        } else if (id.equals("2")) {
            Set<InputItemProgress> loadedItems = inputProgress.loadedItems();
            Assert.assertEquals(1, loadedItems.size());
            InputItemProgress loadedItem = loadedItems.iterator().next();
            Assert.assertTrue(loadedItem instanceof FileItemProgress);
            FileItemProgress fileItem = (FileItemProgress) loadedItem;
            Assert.assertEquals(2, fileItem.offset());
        }
    });
    FileUtils.forceDeleteOnExit(structDir);
}
Also used : HugeGraphLoader(com.baidu.hugegraph.loader.HugeGraphLoader) Vertex(com.baidu.hugegraph.structure.graph.Vertex) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) FileItemProgress(com.baidu.hugegraph.loader.progress.FileItemProgress) InputItemProgress(com.baidu.hugegraph.loader.progress.InputItemProgress) InputProgress(com.baidu.hugegraph.loader.progress.InputProgress) LoadContext(com.baidu.hugegraph.loader.executor.LoadContext) File(java.io.File) Test(org.junit.Test)

Example 4 with InputItemProgress

use of com.baidu.hugegraph.loader.progress.InputItemProgress in project incubator-hugegraph-toolchain by apache.

the class FileLoadTest method testReloadJsonFailureFiles.

@Test
public void testReloadJsonFailureFiles() throws IOException, InterruptedException {
    ioUtil.write("vertex_person.csv", "name,age,city", "marko,29,Beijing", "vadas,27,Hongkong", "tom,28,Wuhan");
    ioUtil.write("edge_knows.json", "{\"source_name\": \"marko\", \"target_name\": " + "\"vadas\", \"date\": \"2016-01-10 12:00:00\"," + "\"weight\": 0.5}", // unexisted source and target vertex
    "{\"source_name\": \"marko1\", \"target_name\": " + "\"vadas1\", \"date\": \"2013-02-20 13:00:00\"," + "\"weight\": 1.0}");
    String[] args = new String[] { "-f", structPath("reload_json_failure_files/struct.json"), "-s", configPath("reload_json_failure_files/schema.groovy"), "-g", GRAPH, "-h", SERVER, "--check-vertex", "true", "--batch-insert-threads", "2", "--test-mode", "false" };
    HugeGraphLoader loader = new HugeGraphLoader(args);
    loader.load();
    LoadContext context = Whitebox.getInternalState(loader, "context");
    List<Edge> edges = CLIENT.graph().listEdges();
    Assert.assertEquals(1, edges.size());
    Map<String, InputProgress> inputProgressMap = context.newProgress().inputProgress();
    Assert.assertEquals(2, inputProgressMap.size());
    Assert.assertEquals(ImmutableSet.of("1", "2"), inputProgressMap.keySet());
    inputProgressMap.forEach((id, value) -> {
        if (id.equals("2")) {
            // The error line is exactly last line
            Set<InputItemProgress> loadedItems = value.loadedItems();
            Assert.assertEquals(1, loadedItems.size());
            InputItemProgress loadedItem = loadedItems.iterator().next();
            FileItemProgress fileItem = (FileItemProgress) loadedItem;
            Assert.assertEquals("edge_knows.json", fileItem.name());
            Assert.assertEquals(2, fileItem.offset());
        }
    });
    // Load failure data without modification
    args = new String[] { "-f", structPath("reload_json_failure_files/struct.json"), "-g", GRAPH, "-h", SERVER, "--failure-mode", "true", "--check-vertex", "true", "--batch-insert-threads", "2", "--test-mode", "false" };
    // No exception throw, but error line still exist
    HugeGraphLoader.main(args);
    Thread.sleep(1000);
    // Reload with modification
    File structDir = FileUtils.getFile(structPath("reload_json_failure_files/struct"));
    File failureDir = FileUtils.getFile(structPath("reload_json_failure_files/struct/failure-data/"));
    File[] files = failureDir.listFiles();
    Arrays.sort(files, Comparator.comparing(File::getName));
    Assert.assertNotNull(files);
    Assert.assertEquals(1, files.length);
    File knowsFailureFile = files[0];
    List<String> failureLines = FileUtils.readLines(knowsFailureFile, Constants.CHARSET);
    Assert.assertEquals(2, failureLines.size());
    Assert.assertEquals("{\"source_name\": \"marko1\", \"target_name\": " + "\"vadas1\", \"date\": \"2013-02-20 13:00:00\"," + "\"weight\": 1.0}", failureLines.get(1));
    failureLines.remove(1);
    failureLines.add("{\"source_name\": \"marko\", \"target_name\": " + "\"tom\", \"date\": \"2013-02-20 13:00:00\"," + "\"weight\": 1.0}");
    FileUtils.writeLines(knowsFailureFile, failureLines, false);
    // No exception throw, and error line doesn't exist
    HugeGraphLoader.main(args);
    edges = CLIENT.graph().listEdges();
    Assert.assertEquals(2, edges.size());
    FileUtils.forceDeleteOnExit(structDir);
}
Also used : HugeGraphLoader(com.baidu.hugegraph.loader.HugeGraphLoader) FileItemProgress(com.baidu.hugegraph.loader.progress.FileItemProgress) InputItemProgress(com.baidu.hugegraph.loader.progress.InputItemProgress) InputProgress(com.baidu.hugegraph.loader.progress.InputProgress) LoadContext(com.baidu.hugegraph.loader.executor.LoadContext) Edge(com.baidu.hugegraph.structure.graph.Edge) File(java.io.File) Test(org.junit.Test)

Aggregations

InputItemProgress (com.baidu.hugegraph.loader.progress.InputItemProgress)4 FileItemProgress (com.baidu.hugegraph.loader.progress.FileItemProgress)3 InputProgress (com.baidu.hugegraph.loader.progress.InputProgress)3 HugeGraphLoader (com.baidu.hugegraph.loader.HugeGraphLoader)2 LoadContext (com.baidu.hugegraph.loader.executor.LoadContext)2 File (java.io.File)2 Test (org.junit.Test)2 SourceType (com.baidu.hugegraph.loader.source.SourceType)1 Edge (com.baidu.hugegraph.structure.graph.Edge)1 Vertex (com.baidu.hugegraph.structure.graph.Vertex)1 JsonNode (com.fasterxml.jackson.databind.JsonNode)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 Set (java.util.Set)1