Search in sources :

Example 21 with InputRelationship

use of org.neo4j.unsafe.impl.batchimport.input.InputRelationship in project neo4j by neo4j.

the class ParallelBatchImporter method importRelationships.

private void importRelationships(NodeRelationshipCache nodeRelationshipCache, CountingStoreUpdateMonitor storeUpdateMonitor, BatchingNeoStores neoStore, IoMonitor writeMonitor, IdMapper idMapper, InputIterable<InputRelationship> relationships, InputCache inputCache, Object[] allRelationshipTypes, Object[] minorityRelationshipTypes) {
    // Imports the relationships from the Input. This isn't a straight forward as importing nodes,
    // since keeping track of and updating heads of relationship chains in scenarios where most nodes
    // are dense and there are many relationship types scales poorly w/ regards to cache memory usage
    // also as a side-effect time required to update this cache.
    //
    // The approach is instead to do multiple iterations where each iteration imports relationships
    // of a single type. For each iteration Node --> Relationship and Relationship --> Relationship
    // stages _for dense nodes only_ are run so that the cache can be reused to hold relationship chain heads
    // of the next type in the next iteration. All relationships will be imported this way and then
    // finally there will be one Node --> Relationship and Relationship --> Relationship stage linking
    // all sparse relationship chains together.
    Set<Object> minorityRelationshipTypeSet = asSet(minorityRelationshipTypes);
    PerTypeRelationshipSplitter perTypeIterator = new PerTypeRelationshipSplitter(relationships.iterator(), allRelationshipTypes, minorityRelationshipTypeSet::contains, neoStore.getRelationshipTypeRepository(), inputCache);
    long nextRelationshipId = 0;
    Configuration relationshipConfig = withBatchSize(config, neoStore.getRelationshipStore().getRecordsPerPage());
    Configuration nodeConfig = withBatchSize(config, neoStore.getNodeStore().getRecordsPerPage());
    for (int i = 0; perTypeIterator.hasNext(); i++) {
        // Stage 3a -- relationships, properties
        nodeRelationshipCache.setForwardScan(true);
        Object currentType = perTypeIterator.currentType();
        int currentTypeId = neoStore.getRelationshipTypeRepository().getOrCreateId(currentType);
        InputIterator<InputRelationship> perType = perTypeIterator.next();
        String topic = " [:" + currentType + "] (" + (i + 1) + "/" + allRelationshipTypes.length + ")";
        final RelationshipStage relationshipStage = new RelationshipStage(topic, config, writeMonitor, perType, idMapper, neoStore, nodeRelationshipCache, storeUpdateMonitor, nextRelationshipId);
        executeStage(relationshipStage);
        // Stage 4a -- set node nextRel fields for dense nodes
        executeStage(new NodeFirstRelationshipStage(topic, nodeConfig, neoStore.getNodeStore(), neoStore.getTemporaryRelationshipGroupStore(), nodeRelationshipCache, true, /*dense*/
        currentTypeId));
        // Stage 5a -- link relationship chains together for dense nodes
        nodeRelationshipCache.setForwardScan(false);
        executeStage(new RelationshipLinkbackStage(topic, relationshipConfig, neoStore.getRelationshipStore(), nodeRelationshipCache, nextRelationshipId, relationshipStage.getNextRelationshipId(), true));
        nextRelationshipId = relationshipStage.getNextRelationshipId();
        // cheap higher level clearing
        nodeRelationshipCache.clearChangedChunks(true);
    }
    String topic = " Sparse";
    nodeRelationshipCache.setForwardScan(true);
    // Stage 4b -- set node nextRe fields for sparse nodes
    executeStage(new NodeFirstRelationshipStage(topic, nodeConfig, neoStore.getNodeStore(), neoStore.getTemporaryRelationshipGroupStore(), nodeRelationshipCache, false, /*sparse*/
    -1));
    // Stage 5b -- link relationship chains together for sparse nodes
    nodeRelationshipCache.setForwardScan(false);
    executeStage(new RelationshipLinkbackStage(topic, relationshipConfig, neoStore.getRelationshipStore(), nodeRelationshipCache, 0, nextRelationshipId, false));
    if (minorityRelationshipTypes.length > 0) {
        // Do some batch insertion style random-access insertions for super small minority types
        executeStage(new BatchInsertRelationshipsStage(config, idMapper, perTypeIterator.getMinorityRelationships(), neoStore, nextRelationshipId));
    }
}
Also used : PerTypeRelationshipSplitter(org.neo4j.unsafe.impl.batchimport.input.PerTypeRelationshipSplitter) InputRelationship(org.neo4j.unsafe.impl.batchimport.input.InputRelationship)

Example 22 with InputRelationship

use of org.neo4j.unsafe.impl.batchimport.input.InputRelationship in project neo4j by neo4j.

the class RelationshipRecordPreparationStep method process.

@Override
protected void process(Batch<InputRelationship, RelationshipRecord> batch, BatchSender sender) throws Throwable {
    batch.records = new RelationshipRecord[batch.input.length];
    long id = batch.firstRecordId;
    for (int i = 0, idIndex = 0; i < batch.records.length; i++, id++) {
        RelationshipRecord relationship = batch.records[i] = new RelationshipRecord(id);
        InputRelationship batchRelationship = batch.input[i];
        long startNodeId = batch.ids[idIndex++];
        long endNodeId = batch.ids[idIndex++];
        if (startNodeId == ID_NOT_FOUND || endNodeId == ID_NOT_FOUND) {
            relationship.setInUse(false);
        } else {
            relationship.setInUse(true);
            // Most rels will not be first in chain
            relationship.setFirstInFirstChain(false);
            relationship.setFirstInSecondChain(false);
            relationship.setFirstPrevRel(Record.NO_NEXT_RELATIONSHIP.intValue());
            relationship.setSecondPrevRel(Record.NO_NEXT_RELATIONSHIP.intValue());
            relationship.setFirstNode(startNodeId);
            relationship.setSecondNode(endNodeId);
            int typeId = batchRelationship.hasTypeId() ? batchRelationship.typeId() : relationshipTypeRepository.getOrCreateId(batchRelationship.type());
            relationship.setType(typeId);
        }
    }
    sender.send(batch);
}
Also used : RelationshipRecord(org.neo4j.kernel.impl.store.record.RelationshipRecord) InputRelationship(org.neo4j.unsafe.impl.batchimport.input.InputRelationship)

Example 23 with InputRelationship

use of org.neo4j.unsafe.impl.batchimport.input.InputRelationship in project neo4j by neo4j.

the class CsvInputBatchImportIT method verifyImportedData.

// ======================================================
// Below is code for verifying the imported data
// ======================================================
private void verifyImportedData(List<InputNode> nodeData, List<InputRelationship> relationshipData) {
    // Build up expected data for the verification below
    Map<String, InputNode> /*id*/
    expectedNodes = new HashMap<>();
    Map<String, String[]> expectedNodeNames = new HashMap<>();
    Map<String, Map<String, Map<String, AtomicInteger>>> /*end node name*/
    expectedRelationships = new AutoCreatingHashMap<>(nested(String.class, nested(String.class, values(AtomicInteger.class))));
    Map<String, AtomicLong> expectedNodeCounts = new AutoCreatingHashMap<>(values(AtomicLong.class));
    Map<String, Map<String, Map<String, AtomicLong>>> expectedRelationshipCounts = new AutoCreatingHashMap<>(nested(String.class, nested(String.class, values(AtomicLong.class))));
    buildUpExpectedData(nodeData, relationshipData, expectedNodes, expectedNodeNames, expectedRelationships, expectedNodeCounts, expectedRelationshipCounts);
    // Do the verification
    GraphDatabaseService db = new TestGraphDatabaseFactory().newEmbeddedDatabase(directory.graphDbDir());
    try (Transaction tx = db.beginTx()) {
        // Verify nodes
        for (Node node : db.getAllNodes()) {
            String name = (String) node.getProperty("name");
            String[] labels = expectedNodeNames.remove(name);
            assertEquals(asSet(labels), names(node.getLabels()));
        }
        assertEquals(0, expectedNodeNames.size());
        // Verify relationships
        for (Relationship relationship : db.getAllRelationships()) {
            String startNodeName = (String) relationship.getStartNode().getProperty("name");
            Map<String, Map<String, AtomicInteger>> inner = expectedRelationships.get(startNodeName);
            String endNodeName = (String) relationship.getEndNode().getProperty("name");
            Map<String, AtomicInteger> innerInner = inner.get(endNodeName);
            String type = relationship.getType().name();
            int countAfterwards = innerInner.get(type).decrementAndGet();
            assertThat(countAfterwards, greaterThanOrEqualTo(0));
            if (countAfterwards == 0) {
                innerInner.remove(type);
                if (innerInner.isEmpty()) {
                    inner.remove(endNodeName);
                    if (inner.isEmpty()) {
                        expectedRelationships.remove(startNodeName);
                    }
                }
            }
        }
        assertEquals(0, expectedRelationships.size());
        // Verify counts, TODO how to get counts store other than this way?
        NeoStores neoStores = ((GraphDatabaseAPI) db).getDependencyResolver().resolveDependency(RecordStorageEngine.class).testAccessNeoStores();
        Function<String, Integer> labelTranslationTable = translationTable(neoStores.getLabelTokenStore(), ReadOperations.ANY_LABEL);
        for (Pair<Integer, Long> count : allNodeCounts(labelTranslationTable, expectedNodeCounts)) {
            assertEquals("Label count mismatch for label " + count.first(), count.other().longValue(), neoStores.getCounts().nodeCount(count.first().intValue(), newDoubleLongRegister()).readSecond());
        }
        Function<String, Integer> relationshipTypeTranslationTable = translationTable(neoStores.getRelationshipTypeTokenStore(), ReadOperations.ANY_RELATIONSHIP_TYPE);
        for (Pair<RelationshipCountKey, Long> count : allRelationshipCounts(labelTranslationTable, relationshipTypeTranslationTable, expectedRelationshipCounts)) {
            RelationshipCountKey key = count.first();
            assertEquals("Label count mismatch for label " + key, count.other().longValue(), neoStores.getCounts().relationshipCount(key.startLabel, key.type, key.endLabel, newDoubleLongRegister()).readSecond());
        }
        tx.success();
    } finally {
        db.shutdown();
    }
}
Also used : AutoCreatingHashMap(org.neo4j.kernel.impl.util.AutoCreatingHashMap) HashMap(java.util.HashMap) AutoCreatingHashMap(org.neo4j.kernel.impl.util.AutoCreatingHashMap) InputNode(org.neo4j.unsafe.impl.batchimport.input.InputNode) Node(org.neo4j.graphdb.Node) GraphDatabaseAPI(org.neo4j.kernel.internal.GraphDatabaseAPI) TestGraphDatabaseFactory(org.neo4j.test.TestGraphDatabaseFactory) InputNode(org.neo4j.unsafe.impl.batchimport.input.InputNode) GraphDatabaseService(org.neo4j.graphdb.GraphDatabaseService) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) AtomicLong(java.util.concurrent.atomic.AtomicLong) Transaction(org.neo4j.graphdb.Transaction) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) RecordStorageEngine(org.neo4j.kernel.impl.storageengine.impl.recordstorage.RecordStorageEngine) InputRelationship(org.neo4j.unsafe.impl.batchimport.input.InputRelationship) Relationship(org.neo4j.graphdb.Relationship) NeoStores(org.neo4j.kernel.impl.store.NeoStores) AtomicLong(java.util.concurrent.atomic.AtomicLong) Map(java.util.Map) HashMap(java.util.HashMap) AutoCreatingHashMap(org.neo4j.kernel.impl.util.AutoCreatingHashMap)

Example 24 with InputRelationship

use of org.neo4j.unsafe.impl.batchimport.input.InputRelationship in project neo4j by neo4j.

the class CsvInputTest method shouldHaveRelationshipsSpecifyStartEndNodeIdGroupsInHeader.

@Test
public void shouldHaveRelationshipsSpecifyStartEndNodeIdGroupsInHeader() throws Exception {
    // GIVEN
    IdType idType = IdType.ACTUAL;
    Iterable<DataFactory<InputRelationship>> data = dataIterable(data("123,TYPE,234\n" + "345,TYPE,456"));
    Groups groups = new Groups();
    Group startNodeGroup = groups.getOrCreate("StartGroup");
    Group endNodeGroup = groups.getOrCreate("EndGroup");
    Input input = new CsvInput(null, null, data, header(entry(null, Type.START_ID, startNodeGroup.name(), idType.extractor(extractors)), entry(null, Type.TYPE, extractors.string()), entry(null, Type.END_ID, endNodeGroup.name(), idType.extractor(extractors))), idType, config(COMMAS), silentBadCollector(0), getRuntime().availableProcessors());
    // WHEN/THEN
    try (InputIterator<InputRelationship> relationships = input.relationships().iterator()) {
        assertRelationship(relationships.next(), startNodeGroup, 123L, endNodeGroup, 234L, "TYPE", properties());
        assertRelationship(relationships.next(), startNodeGroup, 345L, endNodeGroup, 456L, "TYPE", properties());
        assertFalse(relationships.hasNext());
    }
}
Also used : Group(org.neo4j.unsafe.impl.batchimport.input.Group) Input(org.neo4j.unsafe.impl.batchimport.input.Input) Groups(org.neo4j.unsafe.impl.batchimport.input.Groups) InputRelationship(org.neo4j.unsafe.impl.batchimport.input.InputRelationship) Test(org.junit.Test)

Example 25 with InputRelationship

use of org.neo4j.unsafe.impl.batchimport.input.InputRelationship in project neo4j by neo4j.

the class CsvInputTest method shouldCloseDataIteratorsInTheEnd.

@Test
public void shouldCloseDataIteratorsInTheEnd() throws Exception {
    // GIVEN
    CharReadable nodeData = charReader("1");
    CharReadable relationshipData = charReader("1,1");
    IdType idType = IdType.STRING;
    Iterable<DataFactory<InputNode>> nodeDataIterable = dataIterable(given(nodeData));
    Iterable<DataFactory<InputRelationship>> relationshipDataIterable = dataIterable(data(relationshipData, defaultRelationshipType("TYPE")));
    Input input = new CsvInput(nodeDataIterable, header(entry(null, Type.ID, idType.extractor(extractors))), relationshipDataIterable, header(entry(null, Type.START_ID, idType.extractor(extractors)), entry(null, Type.END_ID, idType.extractor(extractors))), idType, config(COMMAS), silentBadCollector(0), getRuntime().availableProcessors());
    // WHEN
    try (ResourceIterator<InputNode> iterator = input.nodes().iterator()) {
        iterator.next();
    }
    try (ResourceIterator<InputRelationship> iterator = input.relationships().iterator()) {
        iterator.next();
    }
    // THEN
    assertClosed(nodeData);
    assertClosed(relationshipData);
}
Also used : InputNode(org.neo4j.unsafe.impl.batchimport.input.InputNode) Input(org.neo4j.unsafe.impl.batchimport.input.Input) CharReadable(org.neo4j.csv.reader.CharReadable) InputRelationship(org.neo4j.unsafe.impl.batchimport.input.InputRelationship) Test(org.junit.Test)

Aggregations

InputRelationship (org.neo4j.unsafe.impl.batchimport.input.InputRelationship)29 Test (org.junit.Test)14 RelationshipRecord (org.neo4j.kernel.impl.store.record.RelationshipRecord)9 Input (org.neo4j.unsafe.impl.batchimport.input.Input)9 InputNode (org.neo4j.unsafe.impl.batchimport.input.InputNode)8 StageControl (org.neo4j.unsafe.impl.batchimport.staging.StageControl)4 HashMap (java.util.HashMap)3 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)3 Matchers.anyString (org.mockito.Matchers.anyString)3 RelationshipStore (org.neo4j.kernel.impl.store.RelationshipStore)3 File (java.io.File)2 IOException (java.io.IOException)2 Map (java.util.Map)2 AtomicLong (java.util.concurrent.atomic.AtomicLong)2 GraphDatabaseService (org.neo4j.graphdb.GraphDatabaseService)2 Transaction (org.neo4j.graphdb.Transaction)2 NeoStores (org.neo4j.kernel.impl.store.NeoStores)2 TestGraphDatabaseFactory (org.neo4j.test.TestGraphDatabaseFactory)2 BatchImporter (org.neo4j.unsafe.impl.batchimport.BatchImporter)2 ParallelBatchImporter (org.neo4j.unsafe.impl.batchimport.ParallelBatchImporter)2