Search in sources :

Example 26 with InputNode

use of org.neo4j.unsafe.impl.batchimport.input.InputNode in project neo4j by neo4j.

the class NodeEncoderStep method process.

@Override
protected void process(Batch<InputNode, NodeRecord> batch, BatchSender sender) {
    InputNode[] input = batch.input;
    batch.records = new NodeRecord[input.length];
    batch.labels = new long[input.length][];
    for (int i = 0; i < input.length; i++) {
        InputNode batchNode = input[i];
        long nodeId = idGenerator.generate(batchNode.id());
        if (batchNode.id() != null) {
            // Nodes are allowed to be anonymous, they just can't be found when creating relationships
            // later on, that's all. Anonymous nodes have null id.
            idMapper.put(batchNode.id(), nodeId, batchNode.group());
        }
        NodeRecord nodeRecord = batch.records[i] = new NodeRecord(nodeId).initialize(true, NO_NEXT_PROPERTY.intValue(), false, NO_NEXT_RELATIONSHIP.intValue(), NO_LABELS_FIELD.intValue());
        // Labels
        if (batchNode.hasLabelField()) {
            nodeRecord.setLabelField(batchNode.labelField(), Collections.<DynamicRecord>emptyList());
        } else if (batchNode.labels().length > 0) {
            long[] labels = batch.labels[i] = labelHolder.getOrCreateIds(batchNode.labels());
            InlineNodeLabels.putSorted(nodeRecord, labels, null, nodeStore.getDynamicLabelStore());
        }
    }
    sender.send(batch);
}
Also used : InputNode(org.neo4j.unsafe.impl.batchimport.input.InputNode) NodeRecord(org.neo4j.kernel.impl.store.record.NodeRecord)

Example 27 with InputNode

use of org.neo4j.unsafe.impl.batchimport.input.InputNode in project neo4j by neo4j.

the class ParallelBatchImporter method doImport.

@Override
public void doImport(Input input) throws IOException {
    log.info("Import starting");
    // Things that we need to close later. The reason they're not in the try-with-resource statement
    // is that we need to close, and set to null, at specific points preferably. So use good ol' finally block.
    NodeRelationshipCache nodeRelationshipCache = null;
    NodeLabelsCache nodeLabelsCache = null;
    long startTime = currentTimeMillis();
    CountingStoreUpdateMonitor storeUpdateMonitor = new CountingStoreUpdateMonitor();
    try (BatchingNeoStores neoStore = getBatchingNeoStores();
        CountsAccessor.Updater countsUpdater = neoStore.getCountsStore().reset(neoStore.getLastCommittedTransactionId());
        InputCache inputCache = new InputCache(fileSystem, storeDir, recordFormats, config)) {
        Collector badCollector = input.badCollector();
        // Some temporary caches and indexes in the import
        IoMonitor writeMonitor = new IoMonitor(neoStore.getIoTracer());
        IdMapper idMapper = input.idMapper();
        IdGenerator idGenerator = input.idGenerator();
        nodeRelationshipCache = new NodeRelationshipCache(AUTO, config.denseNodeThreshold());
        StatsProvider memoryUsageStats = new MemoryUsageStatsProvider(nodeRelationshipCache, idMapper);
        InputIterable<InputNode> nodes = input.nodes();
        InputIterable<InputRelationship> relationships = input.relationships();
        InputIterable<InputNode> cachedNodes = cachedForSure(nodes, inputCache.nodes(MAIN, true));
        InputIterable<InputRelationship> cachedRelationships = cachedForSure(relationships, inputCache.relationships(MAIN, true));
        RelationshipStore relationshipStore = neoStore.getRelationshipStore();
        // Stage 1 -- nodes, properties, labels
        NodeStage nodeStage = new NodeStage(config, writeMonitor, nodes, idMapper, idGenerator, neoStore, inputCache, neoStore.getLabelScanStore(), storeUpdateMonitor, nodeRelationshipCache, memoryUsageStats);
        executeStage(nodeStage);
        if (idMapper.needsPreparation()) {
            executeStage(new IdMapperPreparationStage(config, idMapper, cachedNodes, badCollector, memoryUsageStats));
            PrimitiveLongIterator duplicateNodeIds = badCollector.leftOverDuplicateNodesIds();
            if (duplicateNodeIds.hasNext()) {
                executeStage(new DeleteDuplicateNodesStage(config, duplicateNodeIds, neoStore));
            }
        }
        // Stage 2 -- calculate dense node threshold
        CalculateDenseNodesStage calculateDenseNodesStage = new CalculateDenseNodesStage(withBatchSize(config, config.batchSize() * 10), relationships, nodeRelationshipCache, idMapper, badCollector, inputCache, neoStore);
        executeStage(calculateDenseNodesStage);
        importRelationships(nodeRelationshipCache, storeUpdateMonitor, neoStore, writeMonitor, idMapper, cachedRelationships, inputCache, calculateDenseNodesStage.getRelationshipTypes(Long.MAX_VALUE), calculateDenseNodesStage.getRelationshipTypes(100));
        // Release this potentially really big piece of cached data
        long peakMemoryUsage = totalMemoryUsageOf(idMapper, nodeRelationshipCache);
        long highNodeId = nodeRelationshipCache.getHighNodeId();
        idMapper.close();
        idMapper = null;
        nodeRelationshipCache.close();
        nodeRelationshipCache = null;
        new RelationshipGroupDefragmenter(config, executionMonitor).run(max(max(peakMemoryUsage, highNodeId * 4), mebiBytes(1)), neoStore, highNodeId);
        // Stage 6 -- count nodes per label and labels per node
        nodeLabelsCache = new NodeLabelsCache(AUTO, neoStore.getLabelRepository().getHighId());
        memoryUsageStats = new MemoryUsageStatsProvider(nodeLabelsCache);
        executeStage(new NodeCountsStage(config, nodeLabelsCache, neoStore.getNodeStore(), neoStore.getLabelRepository().getHighId(), countsUpdater, memoryUsageStats));
        // Stage 7 -- count label-[type]->label
        executeStage(new RelationshipCountsStage(config, nodeLabelsCache, relationshipStore, neoStore.getLabelRepository().getHighId(), neoStore.getRelationshipTypeRepository().getHighId(), countsUpdater, AUTO));
        // We're done, do some final logging about it
        long totalTimeMillis = currentTimeMillis() - startTime;
        executionMonitor.done(totalTimeMillis, format("%n") + storeUpdateMonitor.toString() + format("%n") + "Peak memory usage: " + bytes(peakMemoryUsage));
        log.info("Import completed, took " + Format.duration(totalTimeMillis) + ". " + storeUpdateMonitor);
    } catch (Throwable t) {
        log.error("Error during import", t);
        throw Exceptions.launderedException(IOException.class, t);
    } finally {
        if (nodeRelationshipCache != null) {
            nodeRelationshipCache.close();
        }
        if (nodeLabelsCache != null) {
            nodeLabelsCache.close();
        }
    }
}
Also used : NodeLabelsCache(org.neo4j.unsafe.impl.batchimport.cache.NodeLabelsCache) IdMapper(org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper) InputRelationship(org.neo4j.unsafe.impl.batchimport.input.InputRelationship) CountsAccessor(org.neo4j.kernel.impl.api.CountsAccessor) PrimitiveLongIterator(org.neo4j.collection.primitive.PrimitiveLongIterator) Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) InputCache(org.neo4j.unsafe.impl.batchimport.input.InputCache) NodeRelationshipCache(org.neo4j.unsafe.impl.batchimport.cache.NodeRelationshipCache) InputNode(org.neo4j.unsafe.impl.batchimport.input.InputNode) BatchingNeoStores(org.neo4j.unsafe.impl.batchimport.store.BatchingNeoStores) IdGenerator(org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdGenerator) IOException(java.io.IOException) StatsProvider(org.neo4j.unsafe.impl.batchimport.stats.StatsProvider) RelationshipStore(org.neo4j.kernel.impl.store.RelationshipStore) IoMonitor(org.neo4j.unsafe.impl.batchimport.store.io.IoMonitor)

Example 28 with InputNode

use of org.neo4j.unsafe.impl.batchimport.input.InputNode in project neo4j by neo4j.

the class ParallelBatchImporterTest method shouldImportCsvData.

@Test
public void shouldImportCsvData() throws Exception {
    // GIVEN
    ExecutionMonitor processorAssigner = eagerRandomSaturation(config.maxNumberOfProcessors());
    final BatchImporter inserter = new ParallelBatchImporter(directory.graphDbDir(), fileSystemRule.get(), config, NullLogService.getInstance(), processorAssigner, EMPTY, Config.empty(), getFormat());
    boolean successful = false;
    IdGroupDistribution groups = new IdGroupDistribution(NODE_COUNT, 5, random.random());
    long nodeRandomSeed = random.nextLong(), relationshipRandomSeed = random.nextLong();
    try {
        // WHEN
        inserter.doImport(Inputs.input(nodes(nodeRandomSeed, NODE_COUNT, inputIdGenerator, groups), relationships(relationshipRandomSeed, RELATIONSHIP_COUNT, inputIdGenerator, groups), idMapper, idGenerator, /*insanely high bad tolerance, but it will actually never be that many*/
        silentBadCollector(RELATIONSHIP_COUNT)));
        // THEN
        GraphDatabaseService db = new TestGraphDatabaseFactory().newEmbeddedDatabaseBuilder(directory.graphDbDir()).newGraphDatabase();
        try (Transaction tx = db.beginTx()) {
            inputIdGenerator.reset();
            verifyData(NODE_COUNT, RELATIONSHIP_COUNT, db, groups, nodeRandomSeed, relationshipRandomSeed);
            tx.success();
        } finally {
            db.shutdown();
        }
        assertConsistent(directory.graphDbDir());
        successful = true;
    } finally {
        if (!successful) {
            File failureFile = directory.file("input");
            try (PrintStream out = new PrintStream(failureFile)) {
                out.println("Seed used in this failing run: " + random.seed());
                out.println(inputIdGenerator);
                inputIdGenerator.reset();
                for (InputNode node : nodes(nodeRandomSeed, NODE_COUNT, inputIdGenerator, groups)) {
                    out.println(node);
                }
                for (InputRelationship relationship : relationships(relationshipRandomSeed, RELATIONSHIP_COUNT, inputIdGenerator, groups)) {
                    out.println(relationship);
                }
                out.println();
                out.println("Processor assignments");
                out.println(processorAssigner.toString());
            }
            System.err.println("Additional debug information stored in " + failureFile);
        }
    }
}
Also used : GraphDatabaseService(org.neo4j.graphdb.GraphDatabaseService) PrintStream(java.io.PrintStream) InputNode(org.neo4j.unsafe.impl.batchimport.input.InputNode) InputRelationship(org.neo4j.unsafe.impl.batchimport.input.InputRelationship) Transaction(org.neo4j.graphdb.Transaction) TestGraphDatabaseFactory(org.neo4j.test.TestGraphDatabaseFactory) ExecutionMonitor(org.neo4j.unsafe.impl.batchimport.staging.ExecutionMonitor) File(java.io.File) Test(org.junit.Test)

Example 29 with InputNode

use of org.neo4j.unsafe.impl.batchimport.input.InputNode in project neo4j by neo4j.

the class CsvInputBatchImportIT method verifyImportedData.

// ======================================================
// Below is code for verifying the imported data
// ======================================================
private void verifyImportedData(List<InputNode> nodeData, List<InputRelationship> relationshipData) {
    // Build up expected data for the verification below
    Map<String, InputNode> /*id*/
    expectedNodes = new HashMap<>();
    Map<String, String[]> expectedNodeNames = new HashMap<>();
    Map<String, Map<String, Map<String, AtomicInteger>>> /*end node name*/
    expectedRelationships = new AutoCreatingHashMap<>(nested(String.class, nested(String.class, values(AtomicInteger.class))));
    Map<String, AtomicLong> expectedNodeCounts = new AutoCreatingHashMap<>(values(AtomicLong.class));
    Map<String, Map<String, Map<String, AtomicLong>>> expectedRelationshipCounts = new AutoCreatingHashMap<>(nested(String.class, nested(String.class, values(AtomicLong.class))));
    buildUpExpectedData(nodeData, relationshipData, expectedNodes, expectedNodeNames, expectedRelationships, expectedNodeCounts, expectedRelationshipCounts);
    // Do the verification
    GraphDatabaseService db = new TestGraphDatabaseFactory().newEmbeddedDatabase(directory.graphDbDir());
    try (Transaction tx = db.beginTx()) {
        // Verify nodes
        for (Node node : db.getAllNodes()) {
            String name = (String) node.getProperty("name");
            String[] labels = expectedNodeNames.remove(name);
            assertEquals(asSet(labels), names(node.getLabels()));
        }
        assertEquals(0, expectedNodeNames.size());
        // Verify relationships
        for (Relationship relationship : db.getAllRelationships()) {
            String startNodeName = (String) relationship.getStartNode().getProperty("name");
            Map<String, Map<String, AtomicInteger>> inner = expectedRelationships.get(startNodeName);
            String endNodeName = (String) relationship.getEndNode().getProperty("name");
            Map<String, AtomicInteger> innerInner = inner.get(endNodeName);
            String type = relationship.getType().name();
            int countAfterwards = innerInner.get(type).decrementAndGet();
            assertThat(countAfterwards, greaterThanOrEqualTo(0));
            if (countAfterwards == 0) {
                innerInner.remove(type);
                if (innerInner.isEmpty()) {
                    inner.remove(endNodeName);
                    if (inner.isEmpty()) {
                        expectedRelationships.remove(startNodeName);
                    }
                }
            }
        }
        assertEquals(0, expectedRelationships.size());
        // Verify counts, TODO how to get counts store other than this way?
        NeoStores neoStores = ((GraphDatabaseAPI) db).getDependencyResolver().resolveDependency(RecordStorageEngine.class).testAccessNeoStores();
        Function<String, Integer> labelTranslationTable = translationTable(neoStores.getLabelTokenStore(), ReadOperations.ANY_LABEL);
        for (Pair<Integer, Long> count : allNodeCounts(labelTranslationTable, expectedNodeCounts)) {
            assertEquals("Label count mismatch for label " + count.first(), count.other().longValue(), neoStores.getCounts().nodeCount(count.first().intValue(), newDoubleLongRegister()).readSecond());
        }
        Function<String, Integer> relationshipTypeTranslationTable = translationTable(neoStores.getRelationshipTypeTokenStore(), ReadOperations.ANY_RELATIONSHIP_TYPE);
        for (Pair<RelationshipCountKey, Long> count : allRelationshipCounts(labelTranslationTable, relationshipTypeTranslationTable, expectedRelationshipCounts)) {
            RelationshipCountKey key = count.first();
            assertEquals("Label count mismatch for label " + key, count.other().longValue(), neoStores.getCounts().relationshipCount(key.startLabel, key.type, key.endLabel, newDoubleLongRegister()).readSecond());
        }
        tx.success();
    } finally {
        db.shutdown();
    }
}
Also used : AutoCreatingHashMap(org.neo4j.kernel.impl.util.AutoCreatingHashMap) HashMap(java.util.HashMap) AutoCreatingHashMap(org.neo4j.kernel.impl.util.AutoCreatingHashMap) InputNode(org.neo4j.unsafe.impl.batchimport.input.InputNode) Node(org.neo4j.graphdb.Node) GraphDatabaseAPI(org.neo4j.kernel.internal.GraphDatabaseAPI) TestGraphDatabaseFactory(org.neo4j.test.TestGraphDatabaseFactory) InputNode(org.neo4j.unsafe.impl.batchimport.input.InputNode) GraphDatabaseService(org.neo4j.graphdb.GraphDatabaseService) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AtomicLong(java.util.concurrent.atomic.AtomicLong) Transaction(org.neo4j.graphdb.Transaction) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) RecordStorageEngine(org.neo4j.kernel.impl.storageengine.impl.recordstorage.RecordStorageEngine) InputRelationship(org.neo4j.unsafe.impl.batchimport.input.InputRelationship) Relationship(org.neo4j.graphdb.Relationship) NeoStores(org.neo4j.kernel.impl.store.NeoStores) AtomicLong(java.util.concurrent.atomic.AtomicLong) Map(java.util.Map) HashMap(java.util.HashMap) AutoCreatingHashMap(org.neo4j.kernel.impl.util.AutoCreatingHashMap)

Example 30 with InputNode

use of org.neo4j.unsafe.impl.batchimport.input.InputNode in project neo4j by neo4j.

the class CsvInputTest method shouldIgnoreValuesAfterHeaderEntries.

@Test
public void shouldIgnoreValuesAfterHeaderEntries() throws Exception {
    // GIVEN
    Iterable<DataFactory<InputNode>> data = dataIterable(data("1,zergling,bubble,bobble\n" + "2,scv,pun,intended"));
    Input input = new CsvInput(data, header(entry(null, Type.ID, extractors.long_()), entry("name", Type.PROPERTY, extractors.string())), null, null, IdType.ACTUAL, config(COMMAS), silentBadCollector(4), getRuntime().availableProcessors());
    // WHEN
    try (ResourceIterator<InputNode> nodes = input.nodes().iterator()) {
        // THEN
        assertNode(nodes.next(), 1L, new Object[] { "name", "zergling" }, labels());
        assertNode(nodes.next(), 2L, new Object[] { "name", "scv" }, labels());
        assertFalse(nodes.hasNext());
    }
}
Also used : InputNode(org.neo4j.unsafe.impl.batchimport.input.InputNode) Input(org.neo4j.unsafe.impl.batchimport.input.Input) Test(org.junit.Test)

Aggregations

InputNode (org.neo4j.unsafe.impl.batchimport.input.InputNode)42 Test (org.junit.Test)32 Input (org.neo4j.unsafe.impl.batchimport.input.Input)21 InputRelationship (org.neo4j.unsafe.impl.batchimport.input.InputRelationship)8 NodeRecord (org.neo4j.kernel.impl.store.record.NodeRecord)6 Collector (org.neo4j.unsafe.impl.batchimport.input.Collector)6 Groups (org.neo4j.unsafe.impl.batchimport.input.Groups)4 File (java.io.File)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)3 Matchers.anyString (org.mockito.Matchers.anyString)3 StatsProvider (org.neo4j.unsafe.impl.batchimport.stats.StatsProvider)3 StringReader (java.io.StringReader)2 Writer (java.io.Writer)2 CharReadable (org.neo4j.csv.reader.CharReadable)2 GraphDatabaseService (org.neo4j.graphdb.GraphDatabaseService)2 Transaction (org.neo4j.graphdb.Transaction)2 NeoStores (org.neo4j.kernel.impl.store.NeoStores)2