use of org.neo4j.unsafe.impl.batchimport.input.InputRelationship in project neo4j by neo4j.
the class ParallelBatchImporter method importRelationships.
private void importRelationships(NodeRelationshipCache nodeRelationshipCache, CountingStoreUpdateMonitor storeUpdateMonitor, BatchingNeoStores neoStore, IoMonitor writeMonitor, IdMapper idMapper, InputIterable<InputRelationship> relationships, InputCache inputCache, Object[] allRelationshipTypes, Object[] minorityRelationshipTypes) {
// Imports the relationships from the Input. This isn't a straight forward as importing nodes,
// since keeping track of and updating heads of relationship chains in scenarios where most nodes
// are dense and there are many relationship types scales poorly w/ regards to cache memory usage
// also as a side-effect time required to update this cache.
//
// The approach is instead to do multiple iterations where each iteration imports relationships
// of a single type. For each iteration Node --> Relationship and Relationship --> Relationship
// stages _for dense nodes only_ are run so that the cache can be reused to hold relationship chain heads
// of the next type in the next iteration. All relationships will be imported this way and then
// finally there will be one Node --> Relationship and Relationship --> Relationship stage linking
// all sparse relationship chains together.
Set<Object> minorityRelationshipTypeSet = asSet(minorityRelationshipTypes);
PerTypeRelationshipSplitter perTypeIterator = new PerTypeRelationshipSplitter(relationships.iterator(), allRelationshipTypes, minorityRelationshipTypeSet::contains, neoStore.getRelationshipTypeRepository(), inputCache);
long nextRelationshipId = 0;
Configuration relationshipConfig = withBatchSize(config, neoStore.getRelationshipStore().getRecordsPerPage());
Configuration nodeConfig = withBatchSize(config, neoStore.getNodeStore().getRecordsPerPage());
for (int i = 0; perTypeIterator.hasNext(); i++) {
// Stage 3a -- relationships, properties
nodeRelationshipCache.setForwardScan(true);
Object currentType = perTypeIterator.currentType();
int currentTypeId = neoStore.getRelationshipTypeRepository().getOrCreateId(currentType);
InputIterator<InputRelationship> perType = perTypeIterator.next();
String topic = " [:" + currentType + "] (" + (i + 1) + "/" + allRelationshipTypes.length + ")";
final RelationshipStage relationshipStage = new RelationshipStage(topic, config, writeMonitor, perType, idMapper, neoStore, nodeRelationshipCache, storeUpdateMonitor, nextRelationshipId);
executeStage(relationshipStage);
// Stage 4a -- set node nextRel fields for dense nodes
executeStage(new NodeFirstRelationshipStage(topic, nodeConfig, neoStore.getNodeStore(), neoStore.getTemporaryRelationshipGroupStore(), nodeRelationshipCache, true, /*dense*/
currentTypeId));
// Stage 5a -- link relationship chains together for dense nodes
nodeRelationshipCache.setForwardScan(false);
executeStage(new RelationshipLinkbackStage(topic, relationshipConfig, neoStore.getRelationshipStore(), nodeRelationshipCache, nextRelationshipId, relationshipStage.getNextRelationshipId(), true));
nextRelationshipId = relationshipStage.getNextRelationshipId();
// cheap higher level clearing
nodeRelationshipCache.clearChangedChunks(true);
}
String topic = " Sparse";
nodeRelationshipCache.setForwardScan(true);
// Stage 4b -- set node nextRe fields for sparse nodes
executeStage(new NodeFirstRelationshipStage(topic, nodeConfig, neoStore.getNodeStore(), neoStore.getTemporaryRelationshipGroupStore(), nodeRelationshipCache, false, /*sparse*/
-1));
// Stage 5b -- link relationship chains together for sparse nodes
nodeRelationshipCache.setForwardScan(false);
executeStage(new RelationshipLinkbackStage(topic, relationshipConfig, neoStore.getRelationshipStore(), nodeRelationshipCache, 0, nextRelationshipId, false));
if (minorityRelationshipTypes.length > 0) {
// Do some batch insertion style random-access insertions for super small minority types
executeStage(new BatchInsertRelationshipsStage(config, idMapper, perTypeIterator.getMinorityRelationships(), neoStore, nextRelationshipId));
}
}
use of org.neo4j.unsafe.impl.batchimport.input.InputRelationship in project neo4j by neo4j.
the class RelationshipRecordPreparationStep method process.
@Override
protected void process(Batch<InputRelationship, RelationshipRecord> batch, BatchSender sender) throws Throwable {
batch.records = new RelationshipRecord[batch.input.length];
long id = batch.firstRecordId;
for (int i = 0, idIndex = 0; i < batch.records.length; i++, id++) {
RelationshipRecord relationship = batch.records[i] = new RelationshipRecord(id);
InputRelationship batchRelationship = batch.input[i];
long startNodeId = batch.ids[idIndex++];
long endNodeId = batch.ids[idIndex++];
if (startNodeId == ID_NOT_FOUND || endNodeId == ID_NOT_FOUND) {
relationship.setInUse(false);
} else {
relationship.setInUse(true);
// Most rels will not be first in chain
relationship.setFirstInFirstChain(false);
relationship.setFirstInSecondChain(false);
relationship.setFirstPrevRel(Record.NO_NEXT_RELATIONSHIP.intValue());
relationship.setSecondPrevRel(Record.NO_NEXT_RELATIONSHIP.intValue());
relationship.setFirstNode(startNodeId);
relationship.setSecondNode(endNodeId);
int typeId = batchRelationship.hasTypeId() ? batchRelationship.typeId() : relationshipTypeRepository.getOrCreateId(batchRelationship.type());
relationship.setType(typeId);
}
}
sender.send(batch);
}
use of org.neo4j.unsafe.impl.batchimport.input.InputRelationship in project neo4j by neo4j.
the class CsvInputBatchImportIT method verifyImportedData.
// ======================================================
// Below is code for verifying the imported data
// ======================================================
private void verifyImportedData(List<InputNode> nodeData, List<InputRelationship> relationshipData) {
// Build up expected data for the verification below
Map<String, InputNode> /*id*/
expectedNodes = new HashMap<>();
Map<String, String[]> expectedNodeNames = new HashMap<>();
Map<String, Map<String, Map<String, AtomicInteger>>> /*end node name*/
expectedRelationships = new AutoCreatingHashMap<>(nested(String.class, nested(String.class, values(AtomicInteger.class))));
Map<String, AtomicLong> expectedNodeCounts = new AutoCreatingHashMap<>(values(AtomicLong.class));
Map<String, Map<String, Map<String, AtomicLong>>> expectedRelationshipCounts = new AutoCreatingHashMap<>(nested(String.class, nested(String.class, values(AtomicLong.class))));
buildUpExpectedData(nodeData, relationshipData, expectedNodes, expectedNodeNames, expectedRelationships, expectedNodeCounts, expectedRelationshipCounts);
// Do the verification
GraphDatabaseService db = new TestGraphDatabaseFactory().newEmbeddedDatabase(directory.graphDbDir());
try (Transaction tx = db.beginTx()) {
// Verify nodes
for (Node node : db.getAllNodes()) {
String name = (String) node.getProperty("name");
String[] labels = expectedNodeNames.remove(name);
assertEquals(asSet(labels), names(node.getLabels()));
}
assertEquals(0, expectedNodeNames.size());
// Verify relationships
for (Relationship relationship : db.getAllRelationships()) {
String startNodeName = (String) relationship.getStartNode().getProperty("name");
Map<String, Map<String, AtomicInteger>> inner = expectedRelationships.get(startNodeName);
String endNodeName = (String) relationship.getEndNode().getProperty("name");
Map<String, AtomicInteger> innerInner = inner.get(endNodeName);
String type = relationship.getType().name();
int countAfterwards = innerInner.get(type).decrementAndGet();
assertThat(countAfterwards, greaterThanOrEqualTo(0));
if (countAfterwards == 0) {
innerInner.remove(type);
if (innerInner.isEmpty()) {
inner.remove(endNodeName);
if (inner.isEmpty()) {
expectedRelationships.remove(startNodeName);
}
}
}
}
assertEquals(0, expectedRelationships.size());
// Verify counts, TODO how to get counts store other than this way?
NeoStores neoStores = ((GraphDatabaseAPI) db).getDependencyResolver().resolveDependency(RecordStorageEngine.class).testAccessNeoStores();
Function<String, Integer> labelTranslationTable = translationTable(neoStores.getLabelTokenStore(), ReadOperations.ANY_LABEL);
for (Pair<Integer, Long> count : allNodeCounts(labelTranslationTable, expectedNodeCounts)) {
assertEquals("Label count mismatch for label " + count.first(), count.other().longValue(), neoStores.getCounts().nodeCount(count.first().intValue(), newDoubleLongRegister()).readSecond());
}
Function<String, Integer> relationshipTypeTranslationTable = translationTable(neoStores.getRelationshipTypeTokenStore(), ReadOperations.ANY_RELATIONSHIP_TYPE);
for (Pair<RelationshipCountKey, Long> count : allRelationshipCounts(labelTranslationTable, relationshipTypeTranslationTable, expectedRelationshipCounts)) {
RelationshipCountKey key = count.first();
assertEquals("Label count mismatch for label " + key, count.other().longValue(), neoStores.getCounts().relationshipCount(key.startLabel, key.type, key.endLabel, newDoubleLongRegister()).readSecond());
}
tx.success();
} finally {
db.shutdown();
}
}
use of org.neo4j.unsafe.impl.batchimport.input.InputRelationship in project neo4j by neo4j.
the class CsvInputTest method shouldHaveRelationshipsSpecifyStartEndNodeIdGroupsInHeader.
@Test
public void shouldHaveRelationshipsSpecifyStartEndNodeIdGroupsInHeader() throws Exception {
// GIVEN
IdType idType = IdType.ACTUAL;
Iterable<DataFactory<InputRelationship>> data = dataIterable(data("123,TYPE,234\n" + "345,TYPE,456"));
Groups groups = new Groups();
Group startNodeGroup = groups.getOrCreate("StartGroup");
Group endNodeGroup = groups.getOrCreate("EndGroup");
Input input = new CsvInput(null, null, data, header(entry(null, Type.START_ID, startNodeGroup.name(), idType.extractor(extractors)), entry(null, Type.TYPE, extractors.string()), entry(null, Type.END_ID, endNodeGroup.name(), idType.extractor(extractors))), idType, config(COMMAS), silentBadCollector(0), getRuntime().availableProcessors());
// WHEN/THEN
try (InputIterator<InputRelationship> relationships = input.relationships().iterator()) {
assertRelationship(relationships.next(), startNodeGroup, 123L, endNodeGroup, 234L, "TYPE", properties());
assertRelationship(relationships.next(), startNodeGroup, 345L, endNodeGroup, 456L, "TYPE", properties());
assertFalse(relationships.hasNext());
}
}
use of org.neo4j.unsafe.impl.batchimport.input.InputRelationship in project neo4j by neo4j.
the class CsvInputTest method shouldCloseDataIteratorsInTheEnd.
@Test
public void shouldCloseDataIteratorsInTheEnd() throws Exception {
// GIVEN
CharReadable nodeData = charReader("1");
CharReadable relationshipData = charReader("1,1");
IdType idType = IdType.STRING;
Iterable<DataFactory<InputNode>> nodeDataIterable = dataIterable(given(nodeData));
Iterable<DataFactory<InputRelationship>> relationshipDataIterable = dataIterable(data(relationshipData, defaultRelationshipType("TYPE")));
Input input = new CsvInput(nodeDataIterable, header(entry(null, Type.ID, idType.extractor(extractors))), relationshipDataIterable, header(entry(null, Type.START_ID, idType.extractor(extractors)), entry(null, Type.END_ID, idType.extractor(extractors))), idType, config(COMMAS), silentBadCollector(0), getRuntime().availableProcessors());
// WHEN
try (ResourceIterator<InputNode> iterator = input.nodes().iterator()) {
iterator.next();
}
try (ResourceIterator<InputRelationship> iterator = input.relationships().iterator()) {
iterator.next();
}
// THEN
assertClosed(nodeData);
assertClosed(relationshipData);
}
Aggregations