use of com.baidu.hugegraph.loader.HugeGraphLoader in project incubator-hugegraph-toolchain by apache.
the class LoadTask method run.
@Override
public void run() {
Ex.check(this.options != null, "The load options shouldn't be null");
log.info("LoadTask is start running : {}", this.id);
this.loader = new HugeGraphLoader(this.options);
boolean noError;
try {
noError = this.loader.load();
} catch (Throwable e) {
noError = false;
log.error("Run task {} failed", this.id, e);
}
this.lock.lock();
try {
// the program stops by itself
if (this.status.inRunning()) {
if (noError) {
this.status = LoadStatus.SUCCEED;
} else {
this.status = LoadStatus.FAILED;
}
}
this.fileReadLines = this.context().newProgress().totalInputReaded();
this.lastDuration += this.context().summary().totalTime();
this.currDuration = 0L;
} finally {
this.finished = true;
this.lock.unlock();
}
}
use of com.baidu.hugegraph.loader.HugeGraphLoader in project incubator-hugegraph-toolchain by apache.
the class FileLoadTest method testLoadIncrementalModeAndLoadFailure.
@Test
public void testLoadIncrementalModeAndLoadFailure() throws IOException, InterruptedException {
ioUtil.write("vertex_person.csv", "name,age,city", "marko,应该是数字,Beijing", "vadas,27,Hongkong", "josh,32,Beijing", "peter,35,Shanghai", "\"li,nary\",26,\"Wu,han\"");
ioUtil.write("vertex_software.csv", GBK, "name,lang,price", "office,C#,999", "lop,java,应该是数字", "ripple,java,199");
// 1st time
String[] args = new String[] { "-f", structPath("incremental_mode_and_load_failure/struct.json"), "-s", configPath("incremental_mode_and_load_failure/schema.groovy"), "-g", GRAPH, "-h", SERVER, "--batch-insert-threads", "2", "--max-parse-errors", "1", "--test-mode", "false" };
HugeGraphLoader loader = new HugeGraphLoader(args);
loader.load();
LoadContext context = Whitebox.getInternalState(loader, "context");
List<Vertex> vertices = CLIENT.graph().listVertices();
Assert.assertEquals(4, vertices.size());
Map<String, InputProgress> inputProgressMap = context.newProgress().inputProgress();
Assert.assertEquals(1, inputProgressMap.size());
inputProgressMap.forEach((id, inputProgress) -> {
if (id.equals("1")) {
Set<InputItemProgress> loadedItems = inputProgress.loadedItems();
Assert.assertEquals(1, loadedItems.size());
InputItemProgress loadedItem = loadedItems.iterator().next();
Assert.assertTrue(loadedItem instanceof FileItemProgress);
FileItemProgress fileItem = (FileItemProgress) loadedItem;
Assert.assertEquals("vertex_person.csv", fileItem.name());
// Reached last line: "li,nary",26,"Wu,han"
Assert.assertEquals(6, fileItem.offset());
}
});
File structDir = FileUtils.getFile(structPath("incremental_mode_and_load_failure/struct"));
File failureDataDir = FileUtils.getFile(structPath("incremental_mode_and_load_failure/struct/failure-data/"));
File[] files = failureDataDir.listFiles();
Arrays.sort(files, Comparator.comparing(File::getName));
Assert.assertNotNull(files);
Assert.assertEquals(2, files.length);
File personFailureFile = files[0];
List<String> personFailureLines = FileUtils.readLines(personFailureFile, Constants.CHARSET);
Assert.assertEquals(2, personFailureLines.size());
Assert.assertEquals("marko,应该是数字,Beijing", personFailureLines.get(1));
// 2nd time, incremental-mode
args = new String[] { "-f", structPath("incremental_mode_and_load_failure/struct.json"), "-g", GRAPH, "-h", SERVER, "--incremental-mode", "true", "--failure-mode", "false", "--batch-insert-threads", "2", "--max-parse-errors", "2", "--test-mode", "false" };
loader = new HugeGraphLoader(args);
loader.load();
context = Whitebox.getInternalState(loader, "context");
vertices = CLIENT.graph().listVertices();
// ripple,java,199 has been loaded
Assert.assertEquals(6, vertices.size());
inputProgressMap = context.newProgress().inputProgress();
Assert.assertEquals(2, inputProgressMap.size());
inputProgressMap.forEach((id, inputProgress) -> {
if (id.equals("1")) {
Set<InputItemProgress> loadedItems = inputProgress.loadedItems();
Assert.assertEquals(1, loadedItems.size());
InputItemProgress loadedItem = loadedItems.iterator().next();
Assert.assertTrue(loadedItem instanceof FileItemProgress);
FileItemProgress fileItem = (FileItemProgress) loadedItem;
Assert.assertEquals("vertex_person.csv", fileItem.name());
// Reached last line: "li,nary",26,"Wu,han"
Assert.assertEquals(6, fileItem.offset());
} else if (id.equals("2")) {
Set<InputItemProgress> loadedItems = inputProgress.loadedItems();
Assert.assertEquals(1, loadedItems.size());
InputItemProgress loadedItem = loadedItems.iterator().next();
Assert.assertTrue(loadedItem instanceof FileItemProgress);
FileItemProgress fileItem = (FileItemProgress) loadedItem;
Assert.assertEquals("vertex_software.csv", fileItem.name());
// Reached last line: "ripple,java,199"
Assert.assertEquals(4, fileItem.offset());
}
});
Thread.sleep(1000);
files = failureDataDir.listFiles();
Arrays.sort(files, Comparator.comparing(File::getName));
Assert.assertNotNull(files);
Assert.assertEquals(4, files.length);
personFailureFile = files[0];
personFailureLines = FileUtils.readLines(personFailureFile, Constants.CHARSET);
Assert.assertEquals(2, personFailureLines.size());
Assert.assertEquals("marko,应该是数字,Beijing", personFailureLines.get(1));
File softwareFailureFile = files[2];
List<String> softwareFailureLines = FileUtils.readLines(softwareFailureFile, GBK);
Assert.assertEquals(2, softwareFailureLines.size());
Assert.assertEquals("lop,java,应该是数字", softwareFailureLines.get(1));
// TODO: Change only one line first, and make the second line go wrong
// modify person and software failure file
personFailureLines.remove(1);
personFailureLines.add("marko,29,Beijing");
FileUtils.writeLines(personFailureFile, personFailureLines, false);
// modify software failure file
softwareFailureLines.remove(1);
softwareFailureLines.add("lop,java,328");
FileUtils.writeLines(softwareFailureFile, softwareFailureLines, false);
// 3rd time, --failure-mode
args = new String[] { "-f", structPath("incremental_mode_and_load_failure/struct.json"), "-g", GRAPH, "-h", SERVER, "--incremental-mode", "false", "--failure-mode", "true", "--batch-insert-threads", "2", "--max-parse-errors", "2", "--test-mode", "false" };
loader = new HugeGraphLoader(args);
loader.load();
context = Whitebox.getInternalState(loader, "context");
vertices = CLIENT.graph().listVertices();
// marko,29,Beijing has been loaded
Assert.assertEquals(8, vertices.size());
inputProgressMap = context.newProgress().inputProgress();
Assert.assertEquals(2, inputProgressMap.size());
inputProgressMap.forEach((id, inputProgress) -> {
if (id.equals("1")) {
Set<InputItemProgress> loadedItems = inputProgress.loadedItems();
Assert.assertEquals(1, loadedItems.size());
InputItemProgress loadedItem = loadedItems.iterator().next();
Assert.assertTrue(loadedItem instanceof FileItemProgress);
FileItemProgress fileItem = (FileItemProgress) loadedItem;
Assert.assertEquals(2, fileItem.offset());
} else if (id.equals("2")) {
Set<InputItemProgress> loadedItems = inputProgress.loadedItems();
Assert.assertEquals(1, loadedItems.size());
InputItemProgress loadedItem = loadedItems.iterator().next();
Assert.assertTrue(loadedItem instanceof FileItemProgress);
FileItemProgress fileItem = (FileItemProgress) loadedItem;
Assert.assertEquals(2, fileItem.offset());
}
});
FileUtils.forceDeleteOnExit(structDir);
}
use of com.baidu.hugegraph.loader.HugeGraphLoader in project incubator-hugegraph-toolchain by apache.
the class FileLoadTest method testReloadJsonFailureFiles.
@Test
public void testReloadJsonFailureFiles() throws IOException, InterruptedException {
ioUtil.write("vertex_person.csv", "name,age,city", "marko,29,Beijing", "vadas,27,Hongkong", "tom,28,Wuhan");
ioUtil.write("edge_knows.json", "{\"source_name\": \"marko\", \"target_name\": " + "\"vadas\", \"date\": \"2016-01-10 12:00:00\"," + "\"weight\": 0.5}", // unexisted source and target vertex
"{\"source_name\": \"marko1\", \"target_name\": " + "\"vadas1\", \"date\": \"2013-02-20 13:00:00\"," + "\"weight\": 1.0}");
String[] args = new String[] { "-f", structPath("reload_json_failure_files/struct.json"), "-s", configPath("reload_json_failure_files/schema.groovy"), "-g", GRAPH, "-h", SERVER, "--check-vertex", "true", "--batch-insert-threads", "2", "--test-mode", "false" };
HugeGraphLoader loader = new HugeGraphLoader(args);
loader.load();
LoadContext context = Whitebox.getInternalState(loader, "context");
List<Edge> edges = CLIENT.graph().listEdges();
Assert.assertEquals(1, edges.size());
Map<String, InputProgress> inputProgressMap = context.newProgress().inputProgress();
Assert.assertEquals(2, inputProgressMap.size());
Assert.assertEquals(ImmutableSet.of("1", "2"), inputProgressMap.keySet());
inputProgressMap.forEach((id, value) -> {
if (id.equals("2")) {
// The error line is exactly last line
Set<InputItemProgress> loadedItems = value.loadedItems();
Assert.assertEquals(1, loadedItems.size());
InputItemProgress loadedItem = loadedItems.iterator().next();
FileItemProgress fileItem = (FileItemProgress) loadedItem;
Assert.assertEquals("edge_knows.json", fileItem.name());
Assert.assertEquals(2, fileItem.offset());
}
});
// Load failure data without modification
args = new String[] { "-f", structPath("reload_json_failure_files/struct.json"), "-g", GRAPH, "-h", SERVER, "--failure-mode", "true", "--check-vertex", "true", "--batch-insert-threads", "2", "--test-mode", "false" };
// No exception throw, but error line still exist
HugeGraphLoader.main(args);
Thread.sleep(1000);
// Reload with modification
File structDir = FileUtils.getFile(structPath("reload_json_failure_files/struct"));
File failureDir = FileUtils.getFile(structPath("reload_json_failure_files/struct/failure-data/"));
File[] files = failureDir.listFiles();
Arrays.sort(files, Comparator.comparing(File::getName));
Assert.assertNotNull(files);
Assert.assertEquals(1, files.length);
File knowsFailureFile = files[0];
List<String> failureLines = FileUtils.readLines(knowsFailureFile, Constants.CHARSET);
Assert.assertEquals(2, failureLines.size());
Assert.assertEquals("{\"source_name\": \"marko1\", \"target_name\": " + "\"vadas1\", \"date\": \"2013-02-20 13:00:00\"," + "\"weight\": 1.0}", failureLines.get(1));
failureLines.remove(1);
failureLines.add("{\"source_name\": \"marko\", \"target_name\": " + "\"tom\", \"date\": \"2013-02-20 13:00:00\"," + "\"weight\": 1.0}");
FileUtils.writeLines(knowsFailureFile, failureLines, false);
// No exception throw, and error line doesn't exist
HugeGraphLoader.main(args);
edges = CLIENT.graph().listEdges();
Assert.assertEquals(2, edges.size());
FileUtils.forceDeleteOnExit(structDir);
}
use of com.baidu.hugegraph.loader.HugeGraphLoader in project incubator-hugegraph-toolchain by apache.
the class HDFSLoadTest method testHDFSWithUnexistCoreSitePath.
@Test
public void testHDFSWithUnexistCoreSitePath() {
ioUtil.write("vertex_person.csv", "name,age,city", "marko,29,Beijing", "vadas,27,Hongkong", "josh,32,Beijing", "peter,35,Shanghai", "\"li,nary\",26,\"Wu,han\"");
String[] args = new String[] { "-f", structPath("hdfs_with_unexist_core_site_path/struct.json"), "-s", configPath("hdfs_with_unexist_core_site_path/schema.groovy"), "-g", GRAPH, "-h", SERVER, "--batch-insert-threads", "2", "--test-mode", "true" };
Assert.assertThrows(LoadException.class, () -> {
HugeGraphLoader loader = new HugeGraphLoader(args);
loader.load();
}, e -> {
Throwable t = e.getCause();
Assert.assertEquals(IllegalArgumentException.class, t.getClass());
String msg = t.getMessage();
Assert.assertTrue(msg.startsWith("The core site file"));
Assert.assertTrue(msg.endsWith("is not an existing file"));
});
}
use of com.baidu.hugegraph.loader.HugeGraphLoader in project incubator-hugegraph-toolchain by apache.
the class HDFSLoadTest method testHDFSWithCoreSitePathEmpty.
@Test
public void testHDFSWithCoreSitePathEmpty() {
ioUtil.write("vertex_person.csv", "name,age,city", "marko,29,Beijing", "vadas,27,Hongkong", "josh,32,Beijing", "peter,35,Shanghai", "\"li,nary\",26,\"Wu,han\"");
String[] args = new String[] { "-f", structPath("hdfs_with_empty_core_site_path/struct.json"), "-s", configPath("hdfs_with_empty_core_site_path/schema.groovy"), "-g", GRAPH, "-h", SERVER, "--batch-insert-threads", "2", "--test-mode", "true" };
Assert.assertThrows(LoadException.class, () -> {
HugeGraphLoader loader = new HugeGraphLoader(args);
loader.load();
});
}
Aggregations