use of org.neo4j.unsafe.impl.batchimport.input.Input in project neo4j by neo4j.
the class ImportTool method main.
/**
* Runs the import tool given the supplied arguments.
*
* @param incomingArguments arguments for specifying input and configuration for the import.
* @param defaultSettingsSuitableForTests default configuration geared towards unit/integration
* test environments, for example lower default buffer sizes.
*/
public static void main(String[] incomingArguments, boolean defaultSettingsSuitableForTests) throws IOException {
System.err.println("WARNING: neo4j-import is deprecated and support for it will be removed in a future\n" + "version of Neo4j; please use neo4j-admin import instead.\n");
PrintStream out = System.out;
PrintStream err = System.err;
Args args = Args.parse(incomingArguments);
if (ArrayUtil.isEmpty(incomingArguments) || asksForUsage(args)) {
printUsage(out);
return;
}
File storeDir;
Collection<Option<File[]>> nodesFiles, relationshipsFiles;
boolean enableStacktrace;
Number processors = null;
Input input = null;
int badTolerance;
Charset inputEncoding;
boolean skipBadRelationships, skipDuplicateNodes, ignoreExtraColumns;
Config dbConfig;
OutputStream badOutput = null;
IdType idType = null;
int pageSize = UNSPECIFIED;
Collector badCollector;
org.neo4j.unsafe.impl.batchimport.Configuration configuration = null;
File logsDir;
File badFile;
boolean success = false;
try (FileSystemAbstraction fs = new DefaultFileSystemAbstraction()) {
storeDir = args.interpretOption(Options.STORE_DIR.key(), Converters.<File>mandatory(), Converters.toFile(), Validators.DIRECTORY_IS_WRITABLE, Validators.CONTAINS_NO_EXISTING_DATABASE);
Config config = Config.defaults();
config.augment(stringMap(GraphDatabaseSettings.neo4j_home.name(), storeDir.getAbsolutePath()));
logsDir = config.get(GraphDatabaseSettings.logs_directory);
fs.mkdirs(logsDir);
badFile = new File(storeDir, BAD_FILE_NAME);
badOutput = new BufferedOutputStream(fs.openAsOutputStream(badFile, false));
nodesFiles = extractInputFiles(args, Options.NODE_DATA.key(), err);
relationshipsFiles = extractInputFiles(args, Options.RELATIONSHIP_DATA.key(), err);
validateInputFiles(nodesFiles, relationshipsFiles);
enableStacktrace = args.getBoolean(Options.STACKTRACE.key(), Boolean.FALSE, Boolean.TRUE);
processors = args.getNumber(Options.PROCESSORS.key(), null);
idType = args.interpretOption(Options.ID_TYPE.key(), withDefault((IdType) Options.ID_TYPE.defaultValue()), TO_ID_TYPE);
badTolerance = parseNumberOrUnlimited(args, Options.BAD_TOLERANCE);
inputEncoding = Charset.forName(args.get(Options.INPUT_ENCODING.key(), defaultCharset().name()));
skipBadRelationships = args.getBoolean(Options.SKIP_BAD_RELATIONSHIPS.key(), (Boolean) Options.SKIP_BAD_RELATIONSHIPS.defaultValue(), true);
skipDuplicateNodes = args.getBoolean(Options.SKIP_DUPLICATE_NODES.key(), (Boolean) Options.SKIP_DUPLICATE_NODES.defaultValue(), true);
ignoreExtraColumns = args.getBoolean(Options.IGNORE_EXTRA_COLUMNS.key(), (Boolean) Options.IGNORE_EXTRA_COLUMNS.defaultValue(), true);
badCollector = badCollector(badOutput, badTolerance, collect(skipBadRelationships, skipDuplicateNodes, ignoreExtraColumns));
dbConfig = loadDbConfig(args.interpretOption(Options.DATABASE_CONFIG.key(), Converters.<File>optional(), Converters.toFile(), Validators.REGEX_FILE_EXISTS));
configuration = importConfiguration(processors, defaultSettingsSuitableForTests, dbConfig, pageSize);
input = new CsvInput(nodeData(inputEncoding, nodesFiles), defaultFormatNodeFileHeader(), relationshipData(inputEncoding, relationshipsFiles), defaultFormatRelationshipFileHeader(), idType, csvConfiguration(args, defaultSettingsSuitableForTests), badCollector, configuration.maxNumberOfProcessors());
doImport(out, err, storeDir, logsDir, badFile, fs, nodesFiles, relationshipsFiles, enableStacktrace, input, dbConfig, badOutput, configuration);
success = true;
} catch (IllegalArgumentException e) {
throw andPrintError("Input error", e, false, err);
} catch (IOException e) {
throw andPrintError("File error", e, false, err);
} finally {
if (!success && badOutput != null) {
badOutput.close();
}
}
}
use of org.neo4j.unsafe.impl.batchimport.input.Input in project neo4j by neo4j.
the class CsvInputTest method shouldIgnoreValuesAfterHeaderEntries.
@Test
public void shouldIgnoreValuesAfterHeaderEntries() throws Exception {
// GIVEN
Iterable<DataFactory<InputNode>> data = dataIterable(data("1,zergling,bubble,bobble\n" + "2,scv,pun,intended"));
Input input = new CsvInput(data, header(entry(null, Type.ID, extractors.long_()), entry("name", Type.PROPERTY, extractors.string())), null, null, IdType.ACTUAL, config(COMMAS), silentBadCollector(4), getRuntime().availableProcessors());
// WHEN
try (ResourceIterator<InputNode> nodes = input.nodes().iterator()) {
// THEN
assertNode(nodes.next(), 1L, new Object[] { "name", "zergling" }, labels());
assertNode(nodes.next(), 2L, new Object[] { "name", "scv" }, labels());
assertFalse(nodes.hasNext());
}
}
use of org.neo4j.unsafe.impl.batchimport.input.Input in project neo4j by neo4j.
the class CsvInputTest method shouldAllowSomeNodesToBeAnonymous.
@Test
public void shouldAllowSomeNodesToBeAnonymous() throws Exception {
// GIVEN
DataFactory<InputNode> data = data(":ID,name:string,level:int\n" + "abc,Mattias,1\n" + // this node is anonymous
",Johan,2\n");
Iterable<DataFactory<InputNode>> dataIterable = dataIterable(data);
Input input = new CsvInput(dataIterable, defaultFormatNodeFileHeader(), null, null, IdType.STRING, config(COMMAS), silentBadCollector(0), getRuntime().availableProcessors());
// WHEN
try (ResourceIterator<InputNode> nodes = input.nodes().iterator()) {
// THEN
assertNode(nodes.next(), "abc", new Object[] { "name", "Mattias", "level", 1 }, labels());
assertNode(nodes.next(), null, new Object[] { "name", "Johan", "level", 2 }, labels());
assertFalse(nodes.hasNext());
}
}
use of org.neo4j.unsafe.impl.batchimport.input.Input in project neo4j by neo4j.
the class CsvInputTest method shouldHaveNodesBelongToGroupSpecifiedInHeader.
@Test
public void shouldHaveNodesBelongToGroupSpecifiedInHeader() throws Exception {
// GIVEN
IdType idType = IdType.ACTUAL;
Iterable<DataFactory<InputNode>> data = dataIterable(data("123,one\n" + "456,two"));
Groups groups = new Groups();
Group group = groups.getOrCreate("MyGroup");
Input input = new CsvInput(data, header(entry(null, Type.ID, group.name(), idType.extractor(extractors)), entry("name", Type.PROPERTY, extractors.string())), null, null, idType, config(COMMAS), silentBadCollector(0), getRuntime().availableProcessors());
// WHEN/THEN
try (InputIterator<InputNode> nodes = input.nodes().iterator()) {
assertNode(nodes.next(), group, 123L, properties("name", "one"), labels());
assertNode(nodes.next(), group, 456L, properties("name", "two"), labels());
assertFalse(nodes.hasNext());
}
}
use of org.neo4j.unsafe.impl.batchimport.input.Input in project neo4j by neo4j.
the class CsvInputTest method shouldProvideNodesFromCsvInput.
@Test
public void shouldProvideNodesFromCsvInput() throws Exception {
// GIVEN
IdType idType = IdType.ACTUAL;
Iterable<DataFactory<InputNode>> data = dataIterable(data("123,Mattias Persson,HACKER"));
Input input = new CsvInput(data, header(entry(null, Type.ID, idType.extractor(extractors)), entry("name", Type.PROPERTY, extractors.string()), entry("labels", Type.LABEL, extractors.string())), null, null, idType, config(COMMAS), silentBadCollector(0), getRuntime().availableProcessors());
// WHEN/THEN
try (InputIterator<InputNode> nodes = input.nodes().iterator()) {
assertNode(nodes.next(), 123L, properties("name", "Mattias Persson"), labels("HACKER"));
assertFalse(nodes.hasNext());
}
}
Aggregations