Search in sources :

Example 6 with Groups

use of org.neo4j.internal.batchimport.input.Groups in project neo4j by neo4j.

the class ParallelBatchImporterTest method verifyData.

private void verifyData(int nodeCount, int relationshipCount, GraphDatabaseService db, Transaction tx, IdGroupDistribution groups, long nodeRandomSeed, long relationshipRandomSeed) throws IOException {
    // Read all nodes, relationships and properties ad verify against the input data.
    LongAdder propertyCount = new LongAdder();
    try (InputIterator nodes = nodes(nodeRandomSeed, nodeCount, config.batchSize(), inputIdGenerator, groups, propertyCount).iterator();
        InputIterator relationships = relationships(relationshipRandomSeed, relationshipCount, config.batchSize(), inputIdGenerator, groups, propertyCount, new LongAdder()).iterator();
        ResourceIterator<Node> dbNodes = tx.getAllNodes().iterator()) {
        // Nodes
        Map<String, Node> nodeByInputId = new HashMap<>(nodeCount);
        while (dbNodes.hasNext()) {
            Node node = dbNodes.next();
            String id = (String) node.getProperty("id");
            assertNull(nodeByInputId.put(id, node));
        }
        int verifiedNodes = 0;
        long allNodesScanLabelCount = 0;
        InputChunk chunk = nodes.newChunk();
        InputEntity input = new InputEntity();
        while (nodes.next(chunk)) {
            while (chunk.next(input)) {
                String iid = uniqueId(input.idGroup, input.objectId);
                Node node = nodeByInputId.get(iid);
                assertNodeEquals(input, node);
                verifiedNodes++;
                assertDegrees(node);
                allNodesScanLabelCount += Iterables.count(node.getLabels());
            }
        }
        assertEquals(nodeCount, verifiedNodes);
        // Labels
        long labelScanStoreEntryCount = stream(tx.getAllLabels()).flatMap(l -> tx.findNodes(l).stream()).count();
        assertEquals(allNodesScanLabelCount, labelScanStoreEntryCount, format("Expected label scan store and node store to have same number labels. But %n" + "#labelsInNodeStore=%d%n" + "#labelsInLabelScanStore=%d%n", allNodesScanLabelCount, labelScanStoreEntryCount));
        // Relationships
        chunk = relationships.newChunk();
        Map<String, Relationship> relationshipByName = new HashMap<>();
        for (Relationship relationship : tx.getAllRelationships()) {
            relationshipByName.put((String) relationship.getProperty("id"), relationship);
        }
        int verifiedRelationships = 0;
        while (relationships.next(chunk)) {
            while (chunk.next(input)) {
                if (!inputIdGenerator.isMiss(input.objectStartId) && !inputIdGenerator.isMiss(input.objectEndId)) {
                    // A relationship referring to missing nodes. The InputIdGenerator is expected to generate
                    // some (very few) of those. Skip it.
                    String name = (String) propertyOf(input, "id");
                    Relationship relationship = relationshipByName.get(name);
                    assertNotNull(relationship, "Expected there to be a relationship with name '" + name + "'");
                    assertEquals(nodeByInputId.get(uniqueId(input.startIdGroup, input.objectStartId)), relationship.getStartNode());
                    assertEquals(nodeByInputId.get(uniqueId(input.endIdGroup, input.objectEndId)), relationship.getEndNode());
                    assertRelationshipEquals(input, relationship);
                }
                verifiedRelationships++;
            }
        }
        assertEquals(relationshipCount, verifiedRelationships);
    }
}
Also used : ByteUnit.mebiBytes(org.neo4j.io.ByteUnit.mebiBytes) Arrays(java.util.Arrays) ResourceIterator(org.neo4j.graphdb.ResourceIterator) Array(java.lang.reflect.Array) NullLogService(org.neo4j.logging.internal.NullLogService) RandomExtension(org.neo4j.test.extension.RandomExtension) Collector(org.neo4j.internal.batchimport.input.Collector) Direction(org.neo4j.graphdb.Direction) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) Config(org.neo4j.configuration.Config) Result(org.neo4j.consistency.ConsistencyCheckService.Result) DefaultPageCacheTracer(org.neo4j.io.pagecache.tracing.DefaultPageCacheTracer) NullLogProvider(org.neo4j.logging.NullLogProvider) DatabaseLayout(org.neo4j.io.layout.DatabaseLayout) InputChunk(org.neo4j.internal.batchimport.input.InputChunk) DEFAULT_DATABASE_NAME(org.neo4j.configuration.GraphDatabaseSettings.DEFAULT_DATABASE_NAME) RandomValues(org.neo4j.values.storable.RandomValues) ExtendWith(org.junit.jupiter.api.extension.ExtendWith) RandomRule(org.neo4j.test.rule.RandomRule) TransactionLogInitializer(org.neo4j.kernel.impl.transaction.log.files.TransactionLogInitializer) Input.knownEstimates(org.neo4j.internal.batchimport.input.Input.knownEstimates) Map(java.util.Map) Transaction(org.neo4j.graphdb.Transaction) Resources(org.junit.jupiter.api.parallel.Resources) Path(java.nio.file.Path) Input(org.neo4j.internal.batchimport.input.Input) MethodSource(org.junit.jupiter.params.provider.MethodSource) Standard(org.neo4j.kernel.impl.store.format.standard.Standard) Set(java.util.Set) UUID(java.util.UUID) InputEntityVisitor(org.neo4j.internal.batchimport.input.InputEntityVisitor) ExecutionMonitor(org.neo4j.internal.batchimport.staging.ExecutionMonitor) ResourceLock(org.junit.jupiter.api.parallel.ResourceLock) Arguments(org.junit.jupiter.params.provider.Arguments) Neo4jLayoutExtension(org.neo4j.test.extension.Neo4jLayoutExtension) String.format(java.lang.String.format) Entity(org.neo4j.graphdb.Entity) IdType(org.neo4j.internal.batchimport.input.IdType) INSTANCE(org.neo4j.memory.EmptyMemoryTracker.INSTANCE) Stream(java.util.stream.Stream) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) ProcessorAssignmentStrategies(org.neo4j.internal.batchimport.staging.ProcessorAssignmentStrategies) RelationshipType(org.neo4j.graphdb.RelationshipType) DatabaseManagementService(org.neo4j.dbms.api.DatabaseManagementService) ConsistencyCheckService(org.neo4j.consistency.ConsistencyCheckService) SuppressOutput(org.neo4j.test.rule.SuppressOutput) EMPTY(org.neo4j.internal.batchimport.AdditionalInitialIds.EMPTY) LongAdder(java.util.concurrent.atomic.LongAdder) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) Label(org.neo4j.graphdb.Label) GraphDatabaseSettings(org.neo4j.configuration.GraphDatabaseSettings) Groups(org.neo4j.internal.batchimport.input.Groups) StageExecution(org.neo4j.internal.batchimport.staging.StageExecution) Assertions.assertNull(org.junit.jupiter.api.Assertions.assertNull) HashMap(java.util.HashMap) SuppressOutputExtension(org.neo4j.test.extension.SuppressOutputExtension) Node(org.neo4j.graphdb.Node) RecordFormats(org.neo4j.kernel.impl.store.format.RecordFormats) Values(org.neo4j.values.storable.Values) TestDatabaseManagementServiceBuilder(org.neo4j.test.TestDatabaseManagementServiceBuilder) GraphDatabaseService(org.neo4j.graphdb.GraphDatabaseService) Inject(org.neo4j.test.extension.Inject) Iterables(org.neo4j.internal.helpers.collection.Iterables) Math.toIntExact(java.lang.Math.toIntExact) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) JobScheduler(org.neo4j.scheduler.JobScheduler) Arguments.arguments(org.junit.jupiter.params.provider.Arguments.arguments) DependencyResolver(org.neo4j.common.DependencyResolver) PrintStream(java.io.PrintStream) Iterables.count(org.neo4j.internal.helpers.collection.Iterables.count) Files(java.nio.file.Files) Iterables.stream(org.neo4j.internal.helpers.collection.Iterables.stream) IndexImporterFactoryImpl(org.neo4j.kernel.impl.index.schema.IndexImporterFactoryImpl) InputEntity(org.neo4j.internal.batchimport.input.InputEntity) IOException(java.io.IOException) ProgressMonitorFactory(org.neo4j.internal.helpers.progress.ProgressMonitorFactory) ConsistencyCheckIncompleteException(org.neo4j.consistency.checking.full.ConsistencyCheckIncompleteException) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Relationship(org.neo4j.graphdb.Relationship) Iterators.asSet(org.neo4j.internal.helpers.collection.Iterators.asSet) Group(org.neo4j.internal.batchimport.input.Group) ThreadPoolJobScheduler(org.neo4j.test.scheduler.ThreadPoolJobScheduler) FileSystemAbstraction(org.neo4j.io.fs.FileSystemAbstraction) HashMap(java.util.HashMap) Node(org.neo4j.graphdb.Node) InputChunk(org.neo4j.internal.batchimport.input.InputChunk) LongAdder(java.util.concurrent.atomic.LongAdder) Relationship(org.neo4j.graphdb.Relationship) InputEntity(org.neo4j.internal.batchimport.input.InputEntity)

Example 7 with Groups

use of org.neo4j.internal.batchimport.input.Groups in project neo4j by neo4j.

the class ParallelBatchImporterTest method shouldImportCsvData.

@ParameterizedTest
@MethodSource("params")
void shouldImportCsvData(InputIdGenerator inputIdGenerator, IdType idType) throws Exception {
    this.inputIdGenerator = inputIdGenerator;
    // GIVEN
    ExecutionMonitor processorAssigner = ProcessorAssignmentStrategies.eagerRandomSaturation(config.maxNumberOfProcessors());
    CapturingMonitor monitor = new CapturingMonitor(processorAssigner);
    boolean successful = false;
    Groups groups = new Groups();
    IdGroupDistribution groupDistribution = new IdGroupDistribution(NODE_COUNT, NUMBER_OF_ID_GROUPS, random.random(), groups);
    long nodeRandomSeed = random.nextLong();
    long relationshipRandomSeed = random.nextLong();
    var pageCacheTracer = new DefaultPageCacheTracer();
    JobScheduler jobScheduler = new ThreadPoolJobScheduler();
    // This will have statistically half the nodes be considered dense
    Config dbConfig = Config.defaults(GraphDatabaseSettings.dense_node_threshold, RELATIONSHIPS_PER_NODE * 2);
    IndexImporterFactoryImpl indexImporterFactory = new IndexImporterFactoryImpl(dbConfig);
    final BatchImporter inserter = new ParallelBatchImporter(databaseLayout, fs, pageCacheTracer, config, NullLogService.getInstance(), monitor, EMPTY, dbConfig, getFormat(), ImportLogic.NO_MONITOR, jobScheduler, Collector.EMPTY, TransactionLogInitializer.getLogFilesInitializer(), indexImporterFactory, INSTANCE);
    LongAdder propertyCount = new LongAdder();
    LongAdder relationshipCount = new LongAdder();
    try {
        // WHEN
        inserter.doImport(Input.input(nodes(nodeRandomSeed, NODE_COUNT, config.batchSize(), inputIdGenerator, groupDistribution, propertyCount), relationships(relationshipRandomSeed, RELATIONSHIP_COUNT, config.batchSize(), inputIdGenerator, groupDistribution, propertyCount, relationshipCount), idType, knownEstimates(NODE_COUNT, RELATIONSHIP_COUNT, NODE_COUNT * TOKENS.length / 2, RELATIONSHIP_COUNT * TOKENS.length / 2, NODE_COUNT * TOKENS.length / 2 * Long.BYTES, RELATIONSHIP_COUNT * TOKENS.length / 2 * Long.BYTES, NODE_COUNT * TOKENS.length / 2), groups));
        assertThat(pageCacheTracer.pins()).isGreaterThan(0);
        assertThat(pageCacheTracer.pins()).isEqualTo(pageCacheTracer.unpins());
        assertThat(pageCacheTracer.pins()).isEqualTo(Math.addExact(pageCacheTracer.faults(), pageCacheTracer.hits()));
        // THEN
        DatabaseManagementService managementService = getDBMSBuilder(databaseLayout).build();
        GraphDatabaseService db = managementService.database(DEFAULT_DATABASE_NAME);
        try (Transaction tx = db.beginTx()) {
            inputIdGenerator.reset();
            verifyData(NODE_COUNT, RELATIONSHIP_COUNT, db, tx, groupDistribution, nodeRandomSeed, relationshipRandomSeed);
            tx.commit();
        } finally {
            managementService.shutdown();
        }
        assertConsistent(databaseLayout);
        successful = true;
    } finally {
        jobScheduler.close();
        if (!successful) {
            Path failureFile = databaseLayout.databaseDirectory().resolve("input");
            try (PrintStream out = new PrintStream(Files.newOutputStream(failureFile))) {
                out.println("Seed used in this failing run: " + random.seed());
                out.println(inputIdGenerator);
                inputIdGenerator.reset();
                out.println();
                out.println("Processor assignments");
                out.println(processorAssigner.toString());
            }
            System.err.println("Additional debug information stored in " + failureFile);
        }
    }
}
Also used : JobScheduler(org.neo4j.scheduler.JobScheduler) ThreadPoolJobScheduler(org.neo4j.test.scheduler.ThreadPoolJobScheduler) Path(java.nio.file.Path) GraphDatabaseService(org.neo4j.graphdb.GraphDatabaseService) PrintStream(java.io.PrintStream) Config(org.neo4j.configuration.Config) LongAdder(java.util.concurrent.atomic.LongAdder) Transaction(org.neo4j.graphdb.Transaction) Groups(org.neo4j.internal.batchimport.input.Groups) IndexImporterFactoryImpl(org.neo4j.kernel.impl.index.schema.IndexImporterFactoryImpl) ExecutionMonitor(org.neo4j.internal.batchimport.staging.ExecutionMonitor) ThreadPoolJobScheduler(org.neo4j.test.scheduler.ThreadPoolJobScheduler) DatabaseManagementService(org.neo4j.dbms.api.DatabaseManagementService) DefaultPageCacheTracer(org.neo4j.io.pagecache.tracing.DefaultPageCacheTracer) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Aggregations

Groups (org.neo4j.internal.batchimport.input.Groups)7 Input (org.neo4j.internal.batchimport.input.Input)5 IdType (org.neo4j.internal.batchimport.input.IdType)4 Path (java.nio.file.Path)3 Config (org.neo4j.configuration.Config)3 Group (org.neo4j.internal.batchimport.input.Group)3 ExecutionMonitor (org.neo4j.internal.batchimport.staging.ExecutionMonitor)3 IndexImporterFactoryImpl (org.neo4j.kernel.impl.index.schema.IndexImporterFactoryImpl)3 JobScheduler (org.neo4j.scheduler.JobScheduler)3 PrintStream (java.io.PrintStream)2 LongAdder (java.util.concurrent.atomic.LongAdder)2 MutableLong (org.apache.commons.lang3.mutable.MutableLong)2 Test (org.junit.Test)2 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)2 MethodSource (org.junit.jupiter.params.provider.MethodSource)2 DatabaseManagementService (org.neo4j.dbms.api.DatabaseManagementService)2 GraphDatabaseService (org.neo4j.graphdb.GraphDatabaseService)2 Transaction (org.neo4j.graphdb.Transaction)2 FileSystemAbstraction (org.neo4j.io.fs.FileSystemAbstraction)2 DefaultPageCacheTracer (org.neo4j.io.pagecache.tracing.DefaultPageCacheTracer)2