use of org.neo4j.internal.batchimport.BatchImporter in project neo4j by neo4j.
the class MultipleIndexPopulationStressIT method createRandomData.
private void createRandomData(long nodeCount, long relCount) throws Exception {
Config config = Config.defaults(neo4j_home, directory.homePath());
RecordFormats recordFormats = RecordFormatSelector.selectForConfig(config, NullLogProvider.getInstance());
try (RandomDataInput input = new RandomDataInput(nodeCount, relCount);
JobScheduler jobScheduler = new ThreadPoolJobScheduler()) {
DatabaseLayout layout = Neo4jLayout.of(directory.homePath()).databaseLayout(DEFAULT_DATABASE_NAME);
IndexImporterFactory indexImporterFactory = new IndexImporterFactoryImpl(config);
BatchImporter importer = new ParallelBatchImporter(layout, fileSystemAbstraction, PageCacheTracer.NULL, DEFAULT, NullLogService.getInstance(), ExecutionMonitor.INVISIBLE, EMPTY, config, recordFormats, NO_MONITOR, jobScheduler, Collector.EMPTY, TransactionLogInitializer.getLogFilesInitializer(), indexImporterFactory, INSTANCE);
importer.doImport(input);
}
}
use of org.neo4j.internal.batchimport.BatchImporter in project neo4j by neo4j.
the class QuickImport method main.
public static void main(String[] arguments) throws IOException {
Args args = Args.parse(arguments);
long nodeCount = parseLongWithUnit(args.get("nodes", null));
long relationshipCount = parseLongWithUnit(args.get("relationships", null));
int labelCount = args.getNumber("labels", 4).intValue();
int relationshipTypeCount = args.getNumber("relationship-types", 4).intValue();
Path dir = Path.of(args.get("into"));
long randomSeed = args.getNumber("random-seed", currentTimeMillis()).longValue();
Configuration config = Configuration.COMMAS;
Extractors extractors = new Extractors(config.arrayDelimiter());
IdType idType = IdType.valueOf(args.get("id-type", IdType.INTEGER.name()));
Groups groups = new Groups();
Header nodeHeader = parseNodeHeader(args, idType, extractors, groups);
Header relationshipHeader = parseRelationshipHeader(args, idType, extractors, groups);
Config dbConfig;
String dbConfigFileName = args.get("db-config", null);
if (dbConfigFileName != null) {
dbConfig = Config.newBuilder().fromFile(Path.of(dbConfigFileName)).build();
} else {
dbConfig = Config.defaults();
}
Boolean highIo = args.has("high-io") ? args.getBoolean("high-io") : null;
LogProvider logging = NullLogProvider.getInstance();
long pageCacheMemory = args.getNumber("pagecache-memory", org.neo4j.internal.batchimport.Configuration.MAX_PAGE_CACHE_MEMORY).longValue();
org.neo4j.internal.batchimport.Configuration importConfig = new org.neo4j.internal.batchimport.Configuration.Overridden(defaultConfiguration(dir)) {
@Override
public int maxNumberOfProcessors() {
return args.getNumber("processors", super.maxNumberOfProcessors()).intValue();
}
@Override
public boolean highIO() {
return highIo != null ? highIo : super.highIO();
}
@Override
public long pageCacheMemory() {
return pageCacheMemory;
}
@Override
public long maxMemoryUsage() {
String custom = args.get("max-memory", null);
return custom != null ? parseMaxMemory(custom) : super.maxMemoryUsage();
}
@Override
public IndexConfig indexConfig() {
return IndexConfig.create().withLabelIndex().withRelationshipTypeIndex();
}
};
float factorBadNodeData = args.getNumber("factor-bad-node-data", 0).floatValue();
float factorBadRelationshipData = args.getNumber("factor-bad-relationship-data", 0).floatValue();
Input input = new DataGeneratorInput(nodeCount, relationshipCount, idType, randomSeed, 0, nodeHeader, relationshipHeader, labelCount, relationshipTypeCount, factorBadNodeData, factorBadRelationshipData);
try (FileSystemAbstraction fileSystem = new DefaultFileSystemAbstraction();
Lifespan life = new Lifespan()) {
BatchImporter consumer;
if (args.getBoolean("to-csv")) {
consumer = new CsvOutput(dir, nodeHeader, relationshipHeader, config);
} else {
System.out.println("Seed " + randomSeed);
final JobScheduler jobScheduler = life.add(createScheduler());
boolean verbose = args.getBoolean("v");
ExecutionMonitor monitor = verbose ? new SpectrumExecutionMonitor(2, TimeUnit.SECONDS, System.out, 100) : defaultVisible();
consumer = BatchImporterFactory.withHighestPriority().instantiate(DatabaseLayout.ofFlat(dir), fileSystem, PageCacheTracer.NULL, importConfig, new SimpleLogService(logging, logging), monitor, EMPTY, dbConfig, RecordFormatSelector.selectForConfig(dbConfig, logging), NO_MONITOR, jobScheduler, Collector.EMPTY, TransactionLogInitializer.getLogFilesInitializer(), new IndexImporterFactoryImpl(dbConfig), INSTANCE);
}
consumer.doImport(input);
}
}
use of org.neo4j.internal.batchimport.BatchImporter in project neo4j by neo4j.
the class CsvImporter method doImport.
private void doImport(Input input, Collector badCollector) {
boolean success = false;
Path internalLogFile = databaseConfig.get(store_internal_log_path);
try (JobScheduler jobScheduler = createInitialisedScheduler();
OutputStream outputStream = FileSystemUtils.createOrOpenAsOutputStream(fileSystem, internalLogFile, true);
Log4jLogProvider logProvider = Util.configuredLogProvider(databaseConfig, outputStream)) {
ExecutionMonitor executionMonitor = verbose ? new SpectrumExecutionMonitor(2, TimeUnit.SECONDS, stdOut, SpectrumExecutionMonitor.DEFAULT_WIDTH) : ExecutionMonitors.defaultVisible();
BatchImporter importer = BatchImporterFactory.withHighestPriority().instantiate(databaseLayout, fileSystem, pageCacheTracer, importConfig, new SimpleLogService(NullLogProvider.getInstance(), logProvider), executionMonitor, EMPTY, databaseConfig, RecordFormatSelector.selectForConfig(databaseConfig, logProvider), new PrintingImportLogicMonitor(stdOut, stdErr), jobScheduler, badCollector, TransactionLogInitializer.getLogFilesInitializer(), new IndexImporterFactoryImpl(databaseConfig), memoryTracker);
printOverview(databaseLayout.databaseDirectory(), nodeFiles, relationshipFiles, importConfig, stdOut);
importer.doImport(input);
success = true;
} catch (Exception e) {
throw andPrintError("Import error", e, verbose, stdErr);
} finally {
long numberOfBadEntries = badCollector.badEntries();
if (reportFile != null) {
if (numberOfBadEntries > 0) {
stdOut.println("There were bad entries which were skipped and logged into " + reportFile.toAbsolutePath());
}
}
if (!success) {
stdErr.println("WARNING Import failed. The store files in " + databaseLayout.databaseDirectory().toAbsolutePath() + " are left as they are, although they are likely in an unusable state. " + "Starting a database on these store files will likely fail or observe inconsistent records so " + "start at your own risk or delete the store manually");
}
}
}
use of org.neo4j.internal.batchimport.BatchImporter in project neo4j by neo4j.
the class CsvInputBatchImportIT method shouldImportDataComingFromCsvFiles.
@Test
void shouldImportDataComingFromCsvFiles() throws Exception {
// GIVEN
Config dbConfig = Config.newBuilder().set(db_timezone, LogTimeZone.SYSTEM).set(dense_node_threshold, 5).build();
try (JobScheduler scheduler = new ThreadPoolJobScheduler()) {
BatchImporter importer = new ParallelBatchImporter(databaseLayout, fileSystem, PageCacheTracer.NULL, smallBatchSizeConfig(), NullLogService.getInstance(), ExecutionMonitor.INVISIBLE, EMPTY, dbConfig, defaultFormat(), ImportLogic.NO_MONITOR, scheduler, Collector.EMPTY, TransactionLogInitializer.getLogFilesInitializer(), new IndexImporterFactoryImpl(dbConfig), INSTANCE);
List<InputEntity> nodeData = randomNodeData();
List<InputEntity> relationshipData = randomRelationshipData(nodeData);
// WHEN
importer.doImport(csv(nodeDataAsFile(nodeData), relationshipDataAsFile(relationshipData), IdType.STRING, lowBufferSize(COMMAS)));
// THEN
verifyImportedData(nodeData, relationshipData);
}
}
Aggregations