use of org.neo4j.unsafe.impl.batchimport.input.InputNode in project neo4j by neo4j.
the class ParallelBatchImporterTest method nodes.
private InputIterable<InputNode> nodes(final long randomSeed, final long count, final InputIdGenerator inputIdGenerator, final IdGroupDistribution groups) {
return new InputIterable<InputNode>() {
private int calls;
@Override
public InputIterator<InputNode> iterator() {
calls++;
assertTrue("Unexpected use of input iterator " + multiPassIterators + ", " + calls, multiPassIterators || (!multiPassIterators && calls == 1));
return new SimpleInputIterator<InputNode>("test nodes") {
private final Random random = new Random(randomSeed);
private final Randoms randoms = new Randoms(random, Randoms.DEFAULT);
private int cursor;
@Override
protected InputNode fetchNextOrNull() {
if (cursor < count) {
Object nodeId = inputIdGenerator.nextNodeId(random);
Object[] properties = randomProperties(randoms, nodeId);
String[] labels = randoms.selection(TOKENS, 0, TOKENS.length, true);
try {
Group group = groups.groupOf(cursor);
return new InputNode(sourceDescription, itemNumber, itemNumber, group, nodeId, properties, null, labels, null);
} finally {
cursor++;
}
}
return null;
}
};
}
@Override
public boolean supportsMultiplePasses() {
return multiPassIterators;
}
};
}
use of org.neo4j.unsafe.impl.batchimport.input.InputNode in project neo4j by neo4j.
the class ParallelBatchImporterTest method verifyData.
protected void verifyData(int nodeCount, int relationshipCount, GraphDatabaseService db, IdGroupDistribution groups, long nodeRandomSeed, long relationshipRandomSeed) {
// Read all nodes, relationships and properties ad verify against the input data.
try (InputIterator<InputNode> nodes = nodes(nodeRandomSeed, nodeCount, inputIdGenerator, groups).iterator();
InputIterator<InputRelationship> relationships = relationships(relationshipRandomSeed, relationshipCount, inputIdGenerator, groups).iterator()) {
// Nodes
Map<String, Node> nodeByInputId = new HashMap<>(nodeCount);
Iterator<Node> dbNodes = db.getAllNodes().iterator();
int verifiedNodes = 0;
while (nodes.hasNext()) {
InputNode input = nodes.next();
Node node = dbNodes.next();
assertNodeEquals(input, node);
String inputId = uniqueId(input.group(), node);
assertNull(nodeByInputId.put(inputId, node));
verifiedNodes++;
assertDegrees(node);
}
assertEquals(nodeCount, verifiedNodes);
// Relationships
Map<String, Relationship> relationshipByName = new HashMap<>();
for (Relationship relationship : db.getAllRelationships()) {
relationshipByName.put((String) relationship.getProperty("id"), relationship);
}
int verifiedRelationships = 0;
while (relationships.hasNext()) {
InputRelationship input = relationships.next();
if (!inputIdGenerator.isMiss(input.startNode()) && !inputIdGenerator.isMiss(input.endNode())) {
// A relationship referring to missing nodes. The InputIdGenerator is expected to generate
// some (very few) of those. Skip it.
String name = (String) propertyOf(input, "id");
Relationship relationship = relationshipByName.get(name);
assertNotNull("Expected there to be a relationship with name '" + name + "'", relationship);
assertEquals(nodeByInputId.get(uniqueId(input.startNodeGroup(), input.startNode())), relationship.getStartNode());
assertEquals(nodeByInputId.get(uniqueId(input.endNodeGroup(), input.endNode())), relationship.getEndNode());
assertRelationshipEquals(input, relationship);
}
verifiedRelationships++;
}
assertEquals(relationshipCount, verifiedRelationships);
}
}
use of org.neo4j.unsafe.impl.batchimport.input.InputNode in project neo4j by neo4j.
the class ExternalPropertiesDecoratorIT method shouldDecorateExternalPropertiesInParallelProcessingCsvInput.
@Test
public void shouldDecorateExternalPropertiesInParallelProcessingCsvInput() throws Exception {
// GIVEN
int processors = 5;
Collector collector = mock(Collector.class);
int count = 1000;
Configuration config = new Configuration.Overridden(Configuration.COMMAS) {
@Override
public int bufferSize() {
// 300 is empirically measured to roughly produce ~20 chunks
return 300;
}
};
IdType idType = IdType.STRING;
Decorator<InputNode> decorator = spy(new ExternalPropertiesDecorator(data(NO_NODE_DECORATOR, () -> decoratedData(count)), defaultFormatNodeFileHeader(), config, idType, UpdateBehaviour.ADD, collector));
Input input = new CsvInput(nodeData(data(decorator, () -> mainData(count))), defaultFormatNodeFileHeader(), null, null, idType, config, collector, processors);
// WHEN/THEN
try (InputIterator<InputNode> nodes = input.nodes().iterator()) {
int i = 0;
for (; i < count; i++) {
assertTrue(nodes.hasNext());
InputNode node = nodes.next();
// This property comes from decorator
assertHasProperty(node, "extra", node.id() + "-decorated");
if (i == 0) {
// This code is equal to nodes.setProcessors( processors ) (a method which doesn't exist)
nodes.processors(processors - nodes.processors(0));
}
}
assertEquals(count, i);
assertFalse(nodes.hasNext());
}
verify(decorator).close();
}
use of org.neo4j.unsafe.impl.batchimport.input.InputNode in project neo4j by neo4j.
the class InputGroupsDeserializerTest method shouldBeAbleToAskForSourceInformationEvenBetweenTwoSources.
@Test
public void shouldBeAbleToAskForSourceInformationEvenBetweenTwoSources() throws Exception {
// GIVEN
List<DataFactory<InputNode>> data = asList(data(":ID\n1"), data("2"));
final AtomicInteger flips = new AtomicInteger();
final AtomicReference<InputGroupsDeserializer<InputNode>> deserializerTestHack = new AtomicReference<>(null);
InputGroupsDeserializer<InputNode> deserializer = new InputGroupsDeserializer<>(data.iterator(), defaultFormatNodeFileHeader(), lowBufferSize(COMMAS, true), INTEGER, Runtime.getRuntime().availableProcessors(), 1, (header, stream, decorator, validator) -> {
// so ensure that's no longer the case, just by poking those source methods right here and now.
if (flips.get() == 0) {
assertNotNull(deserializerTestHack.get().sourceDescription());
} else {
assertEquals("" + flips.get(), deserializerTestHack.get().sourceDescription());
}
flips.incrementAndGet();
@SuppressWarnings("unchecked") InputEntityDeserializer<InputNode> result = mock(InputEntityDeserializer.class);
when(result.sourceDescription()).thenReturn(String.valueOf(flips.get()));
doAnswer(new Answer<Void>() {
@Override
public Void answer(InvocationOnMock invocation) throws Throwable {
stream.close();
return null;
}
}).when(result).close();
return result;
}, Validators.<InputNode>emptyValidator(), InputNode.class);
deserializerTestHack.set(deserializer);
// WHEN running through the iterator
count(deserializer);
// THEN there should have been two data source flips
assertEquals(2, flips.get());
deserializer.close();
}
use of org.neo4j.unsafe.impl.batchimport.input.InputNode in project neo4j by neo4j.
the class InputGroupsDeserializerTest method shouldCoordinateGroupCreationForParallelProcessing.
@Test
public void shouldCoordinateGroupCreationForParallelProcessing() throws Exception {
// GIVEN
List<DataFactory<InputNode>> data = new ArrayList<>();
int processors = Runtime.getRuntime().availableProcessors();
for (int i = 0; i < processors; i++) {
StringBuilder builder = new StringBuilder(":ID(Group" + i + ")");
for (int j = 0; j < 100; j++) {
builder.append("\n" + j);
}
data.add(data(builder.toString()));
}
Groups groups = new Groups();
IdType idType = IdType.INTEGER;
Collector badCollector = mock(Collector.class);
Configuration config = lowBufferSize(COMMAS, false);
DeserializerFactory<InputNode> factory = defaultNodeDeserializer(groups, config, idType, badCollector);
try (InputGroupsDeserializer<InputNode> deserializer = new InputGroupsDeserializer<>(data.iterator(), defaultFormatNodeFileHeader(), config, idType, processors, processors, factory, Validators.<InputNode>emptyValidator(), InputNode.class)) {
// WHEN
count(deserializer);
}
// THEN
assertEquals(processors, groups.getOrCreate("LastOne").id());
boolean[] seen = new boolean[processors];
for (int i = 0; i < processors; i++) {
String groupName = "Group" + i;
groups.getOrCreate(groupName);
assertFalse(seen[i]);
seen[i] = true;
}
}
Aggregations