use of org.neo4j.internal.batchimport.staging.ExecutionMonitor in project neo4j by neo4j.
the class ImportLogicTest method closeImporterWithoutDiagnosticState.
@Test
void closeImporterWithoutDiagnosticState() throws IOException {
ExecutionMonitor monitor = mock(ExecutionMonitor.class);
IndexImporterFactory factory = mock(IndexImporterFactory.class);
try (BatchingNeoStores stores = batchingNeoStoresWithExternalPageCache(fileSystem, pageCache, NULL, databaseLayout, defaultFormat(), DEFAULT, getInstance(), AdditionalInitialIds.EMPTY, defaults(), INSTANCE)) {
// noinspection EmptyTryBlock
try (ImportLogic logic = new ImportLogic(databaseLayout, stores, DEFAULT, defaults(), getInstance(), monitor, defaultFormat(), Collector.EMPTY, NO_MONITOR, NULL, factory, EmptyMemoryTracker.INSTANCE)) {
// nothing to run in this import
logic.success();
}
}
verify(monitor).done(eq(true), anyLong(), contains("Data statistics is not available."));
}
use of org.neo4j.internal.batchimport.staging.ExecutionMonitor in project neo4j by neo4j.
the class QuickImport method main.
public static void main(String[] arguments) throws IOException {
Args args = Args.parse(arguments);
long nodeCount = parseLongWithUnit(args.get("nodes", null));
long relationshipCount = parseLongWithUnit(args.get("relationships", null));
int labelCount = args.getNumber("labels", 4).intValue();
int relationshipTypeCount = args.getNumber("relationship-types", 4).intValue();
Path dir = Path.of(args.get("into"));
long randomSeed = args.getNumber("random-seed", currentTimeMillis()).longValue();
Configuration config = Configuration.COMMAS;
Extractors extractors = new Extractors(config.arrayDelimiter());
IdType idType = IdType.valueOf(args.get("id-type", IdType.INTEGER.name()));
Groups groups = new Groups();
Header nodeHeader = parseNodeHeader(args, idType, extractors, groups);
Header relationshipHeader = parseRelationshipHeader(args, idType, extractors, groups);
Config dbConfig;
String dbConfigFileName = args.get("db-config", null);
if (dbConfigFileName != null) {
dbConfig = Config.newBuilder().fromFile(Path.of(dbConfigFileName)).build();
} else {
dbConfig = Config.defaults();
}
Boolean highIo = args.has("high-io") ? args.getBoolean("high-io") : null;
LogProvider logging = NullLogProvider.getInstance();
long pageCacheMemory = args.getNumber("pagecache-memory", org.neo4j.internal.batchimport.Configuration.MAX_PAGE_CACHE_MEMORY).longValue();
org.neo4j.internal.batchimport.Configuration importConfig = new org.neo4j.internal.batchimport.Configuration.Overridden(defaultConfiguration(dir)) {
@Override
public int maxNumberOfProcessors() {
return args.getNumber("processors", super.maxNumberOfProcessors()).intValue();
}
@Override
public boolean highIO() {
return highIo != null ? highIo : super.highIO();
}
@Override
public long pageCacheMemory() {
return pageCacheMemory;
}
@Override
public long maxMemoryUsage() {
String custom = args.get("max-memory", null);
return custom != null ? parseMaxMemory(custom) : super.maxMemoryUsage();
}
@Override
public IndexConfig indexConfig() {
return IndexConfig.create().withLabelIndex().withRelationshipTypeIndex();
}
};
float factorBadNodeData = args.getNumber("factor-bad-node-data", 0).floatValue();
float factorBadRelationshipData = args.getNumber("factor-bad-relationship-data", 0).floatValue();
Input input = new DataGeneratorInput(nodeCount, relationshipCount, idType, randomSeed, 0, nodeHeader, relationshipHeader, labelCount, relationshipTypeCount, factorBadNodeData, factorBadRelationshipData);
try (FileSystemAbstraction fileSystem = new DefaultFileSystemAbstraction();
Lifespan life = new Lifespan()) {
BatchImporter consumer;
if (args.getBoolean("to-csv")) {
consumer = new CsvOutput(dir, nodeHeader, relationshipHeader, config);
} else {
System.out.println("Seed " + randomSeed);
final JobScheduler jobScheduler = life.add(createScheduler());
boolean verbose = args.getBoolean("v");
ExecutionMonitor monitor = verbose ? new SpectrumExecutionMonitor(2, TimeUnit.SECONDS, System.out, 100) : defaultVisible();
consumer = BatchImporterFactory.withHighestPriority().instantiate(DatabaseLayout.ofFlat(dir), fileSystem, PageCacheTracer.NULL, importConfig, new SimpleLogService(logging, logging), monitor, EMPTY, dbConfig, RecordFormatSelector.selectForConfig(dbConfig, logging), NO_MONITOR, jobScheduler, Collector.EMPTY, TransactionLogInitializer.getLogFilesInitializer(), new IndexImporterFactoryImpl(dbConfig), INSTANCE);
}
consumer.doImport(input);
}
}
use of org.neo4j.internal.batchimport.staging.ExecutionMonitor in project neo4j by neo4j.
the class CsvImporter method doImport.
private void doImport(Input input, Collector badCollector) {
boolean success = false;
Path internalLogFile = databaseConfig.get(store_internal_log_path);
try (JobScheduler jobScheduler = createInitialisedScheduler();
OutputStream outputStream = FileSystemUtils.createOrOpenAsOutputStream(fileSystem, internalLogFile, true);
Log4jLogProvider logProvider = Util.configuredLogProvider(databaseConfig, outputStream)) {
ExecutionMonitor executionMonitor = verbose ? new SpectrumExecutionMonitor(2, TimeUnit.SECONDS, stdOut, SpectrumExecutionMonitor.DEFAULT_WIDTH) : ExecutionMonitors.defaultVisible();
BatchImporter importer = BatchImporterFactory.withHighestPriority().instantiate(databaseLayout, fileSystem, pageCacheTracer, importConfig, new SimpleLogService(NullLogProvider.getInstance(), logProvider), executionMonitor, EMPTY, databaseConfig, RecordFormatSelector.selectForConfig(databaseConfig, logProvider), new PrintingImportLogicMonitor(stdOut, stdErr), jobScheduler, badCollector, TransactionLogInitializer.getLogFilesInitializer(), new IndexImporterFactoryImpl(databaseConfig), memoryTracker);
printOverview(databaseLayout.databaseDirectory(), nodeFiles, relationshipFiles, importConfig, stdOut);
importer.doImport(input);
success = true;
} catch (Exception e) {
throw andPrintError("Import error", e, verbose, stdErr);
} finally {
long numberOfBadEntries = badCollector.badEntries();
if (reportFile != null) {
if (numberOfBadEntries > 0) {
stdOut.println("There were bad entries which were skipped and logged into " + reportFile.toAbsolutePath());
}
}
if (!success) {
stdErr.println("WARNING Import failed. The store files in " + databaseLayout.databaseDirectory().toAbsolutePath() + " are left as they are, although they are likely in an unusable state. " + "Starting a database on these store files will likely fail or observe inconsistent records so " + "start at your own risk or delete the store manually");
}
}
}
use of org.neo4j.internal.batchimport.staging.ExecutionMonitor in project neo4j by neo4j.
the class ImportLogicTest method shouldUseDataStatisticsCountsForPrintingFinalStats.
@Test
void shouldUseDataStatisticsCountsForPrintingFinalStats() throws IOException {
// given
ExecutionMonitor monitor = mock(ExecutionMonitor.class);
IndexImporterFactory factory = mock(IndexImporterFactory.class);
try (BatchingNeoStores stores = batchingNeoStoresWithExternalPageCache(fileSystem, pageCache, NULL, databaseLayout, defaultFormat(), DEFAULT, getInstance(), AdditionalInitialIds.EMPTY, defaults(), INSTANCE)) {
// when
DataStatistics.RelationshipTypeCount[] relationshipTypeCounts = new DataStatistics.RelationshipTypeCount[] { new DataStatistics.RelationshipTypeCount(0, 33), new DataStatistics.RelationshipTypeCount(1, 66) };
DataStatistics dataStatistics = new DataStatistics(100123, 100456, relationshipTypeCounts);
try (ImportLogic logic = new ImportLogic(databaseLayout, stores, DEFAULT, defaults(), getInstance(), monitor, defaultFormat(), Collector.EMPTY, NO_MONITOR, NULL, factory, EmptyMemoryTracker.INSTANCE)) {
logic.putState(dataStatistics);
logic.success();
}
// then
verify(monitor).done(eq(true), anyLong(), contains(dataStatistics.toString()));
}
}
use of org.neo4j.internal.batchimport.staging.ExecutionMonitor in project neo4j by neo4j.
the class ParallelBatchImporterTest method shouldImportCsvData.
@ParameterizedTest
@MethodSource("params")
void shouldImportCsvData(InputIdGenerator inputIdGenerator, IdType idType) throws Exception {
this.inputIdGenerator = inputIdGenerator;
// GIVEN
ExecutionMonitor processorAssigner = ProcessorAssignmentStrategies.eagerRandomSaturation(config.maxNumberOfProcessors());
CapturingMonitor monitor = new CapturingMonitor(processorAssigner);
boolean successful = false;
Groups groups = new Groups();
IdGroupDistribution groupDistribution = new IdGroupDistribution(NODE_COUNT, NUMBER_OF_ID_GROUPS, random.random(), groups);
long nodeRandomSeed = random.nextLong();
long relationshipRandomSeed = random.nextLong();
var pageCacheTracer = new DefaultPageCacheTracer();
JobScheduler jobScheduler = new ThreadPoolJobScheduler();
// This will have statistically half the nodes be considered dense
Config dbConfig = Config.defaults(GraphDatabaseSettings.dense_node_threshold, RELATIONSHIPS_PER_NODE * 2);
IndexImporterFactoryImpl indexImporterFactory = new IndexImporterFactoryImpl(dbConfig);
final BatchImporter inserter = new ParallelBatchImporter(databaseLayout, fs, pageCacheTracer, config, NullLogService.getInstance(), monitor, EMPTY, dbConfig, getFormat(), ImportLogic.NO_MONITOR, jobScheduler, Collector.EMPTY, TransactionLogInitializer.getLogFilesInitializer(), indexImporterFactory, INSTANCE);
LongAdder propertyCount = new LongAdder();
LongAdder relationshipCount = new LongAdder();
try {
// WHEN
inserter.doImport(Input.input(nodes(nodeRandomSeed, NODE_COUNT, config.batchSize(), inputIdGenerator, groupDistribution, propertyCount), relationships(relationshipRandomSeed, RELATIONSHIP_COUNT, config.batchSize(), inputIdGenerator, groupDistribution, propertyCount, relationshipCount), idType, knownEstimates(NODE_COUNT, RELATIONSHIP_COUNT, NODE_COUNT * TOKENS.length / 2, RELATIONSHIP_COUNT * TOKENS.length / 2, NODE_COUNT * TOKENS.length / 2 * Long.BYTES, RELATIONSHIP_COUNT * TOKENS.length / 2 * Long.BYTES, NODE_COUNT * TOKENS.length / 2), groups));
assertThat(pageCacheTracer.pins()).isGreaterThan(0);
assertThat(pageCacheTracer.pins()).isEqualTo(pageCacheTracer.unpins());
assertThat(pageCacheTracer.pins()).isEqualTo(Math.addExact(pageCacheTracer.faults(), pageCacheTracer.hits()));
// THEN
DatabaseManagementService managementService = getDBMSBuilder(databaseLayout).build();
GraphDatabaseService db = managementService.database(DEFAULT_DATABASE_NAME);
try (Transaction tx = db.beginTx()) {
inputIdGenerator.reset();
verifyData(NODE_COUNT, RELATIONSHIP_COUNT, db, tx, groupDistribution, nodeRandomSeed, relationshipRandomSeed);
tx.commit();
} finally {
managementService.shutdown();
}
assertConsistent(databaseLayout);
successful = true;
} finally {
jobScheduler.close();
if (!successful) {
Path failureFile = databaseLayout.databaseDirectory().resolve("input");
try (PrintStream out = new PrintStream(Files.newOutputStream(failureFile))) {
out.println("Seed used in this failing run: " + random.seed());
out.println(inputIdGenerator);
inputIdGenerator.reset();
out.println();
out.println("Processor assignments");
out.println(processorAssigner.toString());
}
System.err.println("Additional debug information stored in " + failureFile);
}
}
}
Aggregations