use of com.baidu.hugegraph.loader.progress.FileItemProgress in project incubator-hugegraph-toolchain by apache.
the class FileLoadTest method testLoadIncrementalModeAndLoadFailure.
@Test
public void testLoadIncrementalModeAndLoadFailure() throws IOException, InterruptedException {
ioUtil.write("vertex_person.csv", "name,age,city", "marko,应该是数字,Beijing", "vadas,27,Hongkong", "josh,32,Beijing", "peter,35,Shanghai", "\"li,nary\",26,\"Wu,han\"");
ioUtil.write("vertex_software.csv", GBK, "name,lang,price", "office,C#,999", "lop,java,应该是数字", "ripple,java,199");
// 1st time
String[] args = new String[] { "-f", structPath("incremental_mode_and_load_failure/struct.json"), "-s", configPath("incremental_mode_and_load_failure/schema.groovy"), "-g", GRAPH, "-h", SERVER, "--batch-insert-threads", "2", "--max-parse-errors", "1", "--test-mode", "false" };
HugeGraphLoader loader = new HugeGraphLoader(args);
loader.load();
LoadContext context = Whitebox.getInternalState(loader, "context");
List<Vertex> vertices = CLIENT.graph().listVertices();
Assert.assertEquals(4, vertices.size());
Map<String, InputProgress> inputProgressMap = context.newProgress().inputProgress();
Assert.assertEquals(1, inputProgressMap.size());
inputProgressMap.forEach((id, inputProgress) -> {
if (id.equals("1")) {
Set<InputItemProgress> loadedItems = inputProgress.loadedItems();
Assert.assertEquals(1, loadedItems.size());
InputItemProgress loadedItem = loadedItems.iterator().next();
Assert.assertTrue(loadedItem instanceof FileItemProgress);
FileItemProgress fileItem = (FileItemProgress) loadedItem;
Assert.assertEquals("vertex_person.csv", fileItem.name());
// Reached last line: "li,nary",26,"Wu,han"
Assert.assertEquals(6, fileItem.offset());
}
});
File structDir = FileUtils.getFile(structPath("incremental_mode_and_load_failure/struct"));
File failureDataDir = FileUtils.getFile(structPath("incremental_mode_and_load_failure/struct/failure-data/"));
File[] files = failureDataDir.listFiles();
Arrays.sort(files, Comparator.comparing(File::getName));
Assert.assertNotNull(files);
Assert.assertEquals(2, files.length);
File personFailureFile = files[0];
List<String> personFailureLines = FileUtils.readLines(personFailureFile, Constants.CHARSET);
Assert.assertEquals(2, personFailureLines.size());
Assert.assertEquals("marko,应该是数字,Beijing", personFailureLines.get(1));
// 2nd time, incremental-mode
args = new String[] { "-f", structPath("incremental_mode_and_load_failure/struct.json"), "-g", GRAPH, "-h", SERVER, "--incremental-mode", "true", "--failure-mode", "false", "--batch-insert-threads", "2", "--max-parse-errors", "2", "--test-mode", "false" };
loader = new HugeGraphLoader(args);
loader.load();
context = Whitebox.getInternalState(loader, "context");
vertices = CLIENT.graph().listVertices();
// ripple,java,199 has been loaded
Assert.assertEquals(6, vertices.size());
inputProgressMap = context.newProgress().inputProgress();
Assert.assertEquals(2, inputProgressMap.size());
inputProgressMap.forEach((id, inputProgress) -> {
if (id.equals("1")) {
Set<InputItemProgress> loadedItems = inputProgress.loadedItems();
Assert.assertEquals(1, loadedItems.size());
InputItemProgress loadedItem = loadedItems.iterator().next();
Assert.assertTrue(loadedItem instanceof FileItemProgress);
FileItemProgress fileItem = (FileItemProgress) loadedItem;
Assert.assertEquals("vertex_person.csv", fileItem.name());
// Reached last line: "li,nary",26,"Wu,han"
Assert.assertEquals(6, fileItem.offset());
} else if (id.equals("2")) {
Set<InputItemProgress> loadedItems = inputProgress.loadedItems();
Assert.assertEquals(1, loadedItems.size());
InputItemProgress loadedItem = loadedItems.iterator().next();
Assert.assertTrue(loadedItem instanceof FileItemProgress);
FileItemProgress fileItem = (FileItemProgress) loadedItem;
Assert.assertEquals("vertex_software.csv", fileItem.name());
// Reached last line: "ripple,java,199"
Assert.assertEquals(4, fileItem.offset());
}
});
Thread.sleep(1000);
files = failureDataDir.listFiles();
Arrays.sort(files, Comparator.comparing(File::getName));
Assert.assertNotNull(files);
Assert.assertEquals(4, files.length);
personFailureFile = files[0];
personFailureLines = FileUtils.readLines(personFailureFile, Constants.CHARSET);
Assert.assertEquals(2, personFailureLines.size());
Assert.assertEquals("marko,应该是数字,Beijing", personFailureLines.get(1));
File softwareFailureFile = files[2];
List<String> softwareFailureLines = FileUtils.readLines(softwareFailureFile, GBK);
Assert.assertEquals(2, softwareFailureLines.size());
Assert.assertEquals("lop,java,应该是数字", softwareFailureLines.get(1));
// TODO: Change only one line first, and make the second line go wrong
// modify person and software failure file
personFailureLines.remove(1);
personFailureLines.add("marko,29,Beijing");
FileUtils.writeLines(personFailureFile, personFailureLines, false);
// modify software failure file
softwareFailureLines.remove(1);
softwareFailureLines.add("lop,java,328");
FileUtils.writeLines(softwareFailureFile, softwareFailureLines, false);
// 3rd time, --failure-mode
args = new String[] { "-f", structPath("incremental_mode_and_load_failure/struct.json"), "-g", GRAPH, "-h", SERVER, "--incremental-mode", "false", "--failure-mode", "true", "--batch-insert-threads", "2", "--max-parse-errors", "2", "--test-mode", "false" };
loader = new HugeGraphLoader(args);
loader.load();
context = Whitebox.getInternalState(loader, "context");
vertices = CLIENT.graph().listVertices();
// marko,29,Beijing has been loaded
Assert.assertEquals(8, vertices.size());
inputProgressMap = context.newProgress().inputProgress();
Assert.assertEquals(2, inputProgressMap.size());
inputProgressMap.forEach((id, inputProgress) -> {
if (id.equals("1")) {
Set<InputItemProgress> loadedItems = inputProgress.loadedItems();
Assert.assertEquals(1, loadedItems.size());
InputItemProgress loadedItem = loadedItems.iterator().next();
Assert.assertTrue(loadedItem instanceof FileItemProgress);
FileItemProgress fileItem = (FileItemProgress) loadedItem;
Assert.assertEquals(2, fileItem.offset());
} else if (id.equals("2")) {
Set<InputItemProgress> loadedItems = inputProgress.loadedItems();
Assert.assertEquals(1, loadedItems.size());
InputItemProgress loadedItem = loadedItems.iterator().next();
Assert.assertTrue(loadedItem instanceof FileItemProgress);
FileItemProgress fileItem = (FileItemProgress) loadedItem;
Assert.assertEquals(2, fileItem.offset());
}
});
FileUtils.forceDeleteOnExit(structDir);
}
use of com.baidu.hugegraph.loader.progress.FileItemProgress in project incubator-hugegraph-toolchain by apache.
the class FileLoadTest method testReloadJsonFailureFiles.
@Test
public void testReloadJsonFailureFiles() throws IOException, InterruptedException {
ioUtil.write("vertex_person.csv", "name,age,city", "marko,29,Beijing", "vadas,27,Hongkong", "tom,28,Wuhan");
ioUtil.write("edge_knows.json", "{\"source_name\": \"marko\", \"target_name\": " + "\"vadas\", \"date\": \"2016-01-10 12:00:00\"," + "\"weight\": 0.5}", // unexisted source and target vertex
"{\"source_name\": \"marko1\", \"target_name\": " + "\"vadas1\", \"date\": \"2013-02-20 13:00:00\"," + "\"weight\": 1.0}");
String[] args = new String[] { "-f", structPath("reload_json_failure_files/struct.json"), "-s", configPath("reload_json_failure_files/schema.groovy"), "-g", GRAPH, "-h", SERVER, "--check-vertex", "true", "--batch-insert-threads", "2", "--test-mode", "false" };
HugeGraphLoader loader = new HugeGraphLoader(args);
loader.load();
LoadContext context = Whitebox.getInternalState(loader, "context");
List<Edge> edges = CLIENT.graph().listEdges();
Assert.assertEquals(1, edges.size());
Map<String, InputProgress> inputProgressMap = context.newProgress().inputProgress();
Assert.assertEquals(2, inputProgressMap.size());
Assert.assertEquals(ImmutableSet.of("1", "2"), inputProgressMap.keySet());
inputProgressMap.forEach((id, value) -> {
if (id.equals("2")) {
// The error line is exactly last line
Set<InputItemProgress> loadedItems = value.loadedItems();
Assert.assertEquals(1, loadedItems.size());
InputItemProgress loadedItem = loadedItems.iterator().next();
FileItemProgress fileItem = (FileItemProgress) loadedItem;
Assert.assertEquals("edge_knows.json", fileItem.name());
Assert.assertEquals(2, fileItem.offset());
}
});
// Load failure data without modification
args = new String[] { "-f", structPath("reload_json_failure_files/struct.json"), "-g", GRAPH, "-h", SERVER, "--failure-mode", "true", "--check-vertex", "true", "--batch-insert-threads", "2", "--test-mode", "false" };
// No exception throw, but error line still exist
HugeGraphLoader.main(args);
Thread.sleep(1000);
// Reload with modification
File structDir = FileUtils.getFile(structPath("reload_json_failure_files/struct"));
File failureDir = FileUtils.getFile(structPath("reload_json_failure_files/struct/failure-data/"));
File[] files = failureDir.listFiles();
Arrays.sort(files, Comparator.comparing(File::getName));
Assert.assertNotNull(files);
Assert.assertEquals(1, files.length);
File knowsFailureFile = files[0];
List<String> failureLines = FileUtils.readLines(knowsFailureFile, Constants.CHARSET);
Assert.assertEquals(2, failureLines.size());
Assert.assertEquals("{\"source_name\": \"marko1\", \"target_name\": " + "\"vadas1\", \"date\": \"2013-02-20 13:00:00\"," + "\"weight\": 1.0}", failureLines.get(1));
failureLines.remove(1);
failureLines.add("{\"source_name\": \"marko\", \"target_name\": " + "\"tom\", \"date\": \"2013-02-20 13:00:00\"," + "\"weight\": 1.0}");
FileUtils.writeLines(knowsFailureFile, failureLines, false);
// No exception throw, and error line doesn't exist
HugeGraphLoader.main(args);
edges = CLIENT.graph().listEdges();
Assert.assertEquals(2, edges.size());
FileUtils.forceDeleteOnExit(structDir);
}
Aggregations