Search in sources :

Example 1 with InputNode

use of org.neo4j.unsafe.impl.batchimport.input.InputNode in project neo4j by neo4j.

the class ParallelBatchImporterTest method nodes.

private InputIterable<InputNode> nodes(final long randomSeed, final long count, final InputIdGenerator inputIdGenerator, final IdGroupDistribution groups) {
    return new InputIterable<InputNode>() {

        private int calls;

        @Override
        public InputIterator<InputNode> iterator() {
            calls++;
            assertTrue("Unexpected use of input iterator " + multiPassIterators + ", " + calls, multiPassIterators || (!multiPassIterators && calls == 1));
            return new SimpleInputIterator<InputNode>("test nodes") {

                private final Random random = new Random(randomSeed);

                private final Randoms randoms = new Randoms(random, Randoms.DEFAULT);

                private int cursor;

                @Override
                protected InputNode fetchNextOrNull() {
                    if (cursor < count) {
                        Object nodeId = inputIdGenerator.nextNodeId(random);
                        Object[] properties = randomProperties(randoms, nodeId);
                        String[] labels = randoms.selection(TOKENS, 0, TOKENS.length, true);
                        try {
                            Group group = groups.groupOf(cursor);
                            return new InputNode(sourceDescription, itemNumber, itemNumber, group, nodeId, properties, null, labels, null);
                        } finally {
                            cursor++;
                        }
                    }
                    return null;
                }
            };
        }

        @Override
        public boolean supportsMultiplePasses() {
            return multiPassIterators;
        }
    };
}
Also used : InputNode(org.neo4j.unsafe.impl.batchimport.input.InputNode) Group(org.neo4j.unsafe.impl.batchimport.input.Group) Randoms(org.neo4j.test.Randoms) SimpleInputIterator(org.neo4j.unsafe.impl.batchimport.input.SimpleInputIterator) Random(java.util.Random)

Example 2 with InputNode

use of org.neo4j.unsafe.impl.batchimport.input.InputNode in project neo4j by neo4j.

the class ParallelBatchImporterTest method verifyData.

protected void verifyData(int nodeCount, int relationshipCount, GraphDatabaseService db, IdGroupDistribution groups, long nodeRandomSeed, long relationshipRandomSeed) {
    // Read all nodes, relationships and properties ad verify against the input data.
    try (InputIterator<InputNode> nodes = nodes(nodeRandomSeed, nodeCount, inputIdGenerator, groups).iterator();
        InputIterator<InputRelationship> relationships = relationships(relationshipRandomSeed, relationshipCount, inputIdGenerator, groups).iterator()) {
        // Nodes
        Map<String, Node> nodeByInputId = new HashMap<>(nodeCount);
        Iterator<Node> dbNodes = db.getAllNodes().iterator();
        int verifiedNodes = 0;
        while (nodes.hasNext()) {
            InputNode input = nodes.next();
            Node node = dbNodes.next();
            assertNodeEquals(input, node);
            String inputId = uniqueId(input.group(), node);
            assertNull(nodeByInputId.put(inputId, node));
            verifiedNodes++;
            assertDegrees(node);
        }
        assertEquals(nodeCount, verifiedNodes);
        // Relationships
        Map<String, Relationship> relationshipByName = new HashMap<>();
        for (Relationship relationship : db.getAllRelationships()) {
            relationshipByName.put((String) relationship.getProperty("id"), relationship);
        }
        int verifiedRelationships = 0;
        while (relationships.hasNext()) {
            InputRelationship input = relationships.next();
            if (!inputIdGenerator.isMiss(input.startNode()) && !inputIdGenerator.isMiss(input.endNode())) {
                // A relationship referring to missing nodes. The InputIdGenerator is expected to generate
                // some (very few) of those. Skip it.
                String name = (String) propertyOf(input, "id");
                Relationship relationship = relationshipByName.get(name);
                assertNotNull("Expected there to be a relationship with name '" + name + "'", relationship);
                assertEquals(nodeByInputId.get(uniqueId(input.startNodeGroup(), input.startNode())), relationship.getStartNode());
                assertEquals(nodeByInputId.get(uniqueId(input.endNodeGroup(), input.endNode())), relationship.getEndNode());
                assertRelationshipEquals(input, relationship);
            }
            verifiedRelationships++;
        }
        assertEquals(relationshipCount, verifiedRelationships);
    }
}
Also used : InputNode(org.neo4j.unsafe.impl.batchimport.input.InputNode) HashMap(java.util.HashMap) InputNode(org.neo4j.unsafe.impl.batchimport.input.InputNode) Node(org.neo4j.graphdb.Node) InputRelationship(org.neo4j.unsafe.impl.batchimport.input.InputRelationship) Relationship(org.neo4j.graphdb.Relationship) InputRelationship(org.neo4j.unsafe.impl.batchimport.input.InputRelationship)

Example 3 with InputNode

use of org.neo4j.unsafe.impl.batchimport.input.InputNode in project neo4j by neo4j.

the class ExternalPropertiesDecoratorIT method shouldDecorateExternalPropertiesInParallelProcessingCsvInput.

@Test
public void shouldDecorateExternalPropertiesInParallelProcessingCsvInput() throws Exception {
    // GIVEN
    int processors = 5;
    Collector collector = mock(Collector.class);
    int count = 1000;
    Configuration config = new Configuration.Overridden(Configuration.COMMAS) {

        @Override
        public int bufferSize() {
            // 300 is empirically measured to roughly produce ~20 chunks
            return 300;
        }
    };
    IdType idType = IdType.STRING;
    Decorator<InputNode> decorator = spy(new ExternalPropertiesDecorator(data(NO_NODE_DECORATOR, () -> decoratedData(count)), defaultFormatNodeFileHeader(), config, idType, UpdateBehaviour.ADD, collector));
    Input input = new CsvInput(nodeData(data(decorator, () -> mainData(count))), defaultFormatNodeFileHeader(), null, null, idType, config, collector, processors);
    // WHEN/THEN
    try (InputIterator<InputNode> nodes = input.nodes().iterator()) {
        int i = 0;
        for (; i < count; i++) {
            assertTrue(nodes.hasNext());
            InputNode node = nodes.next();
            // This property comes from decorator
            assertHasProperty(node, "extra", node.id() + "-decorated");
            if (i == 0) {
                // This code is equal to nodes.setProcessors( processors ) (a method which doesn't exist)
                nodes.processors(processors - nodes.processors(0));
            }
        }
        assertEquals(count, i);
        assertFalse(nodes.hasNext());
    }
    verify(decorator).close();
}
Also used : InputNode(org.neo4j.unsafe.impl.batchimport.input.InputNode) Input(org.neo4j.unsafe.impl.batchimport.input.Input) Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) Test(org.junit.Test)

Example 4 with InputNode

use of org.neo4j.unsafe.impl.batchimport.input.InputNode in project neo4j by neo4j.

the class InputGroupsDeserializerTest method shouldBeAbleToAskForSourceInformationEvenBetweenTwoSources.

@Test
public void shouldBeAbleToAskForSourceInformationEvenBetweenTwoSources() throws Exception {
    // GIVEN
    List<DataFactory<InputNode>> data = asList(data(":ID\n1"), data("2"));
    final AtomicInteger flips = new AtomicInteger();
    final AtomicReference<InputGroupsDeserializer<InputNode>> deserializerTestHack = new AtomicReference<>(null);
    InputGroupsDeserializer<InputNode> deserializer = new InputGroupsDeserializer<>(data.iterator(), defaultFormatNodeFileHeader(), lowBufferSize(COMMAS, true), INTEGER, Runtime.getRuntime().availableProcessors(), 1, (header, stream, decorator, validator) -> {
        // so ensure that's no longer the case, just by poking those source methods right here and now.
        if (flips.get() == 0) {
            assertNotNull(deserializerTestHack.get().sourceDescription());
        } else {
            assertEquals("" + flips.get(), deserializerTestHack.get().sourceDescription());
        }
        flips.incrementAndGet();
        @SuppressWarnings("unchecked") InputEntityDeserializer<InputNode> result = mock(InputEntityDeserializer.class);
        when(result.sourceDescription()).thenReturn(String.valueOf(flips.get()));
        doAnswer(new Answer<Void>() {

            @Override
            public Void answer(InvocationOnMock invocation) throws Throwable {
                stream.close();
                return null;
            }
        }).when(result).close();
        return result;
    }, Validators.<InputNode>emptyValidator(), InputNode.class);
    deserializerTestHack.set(deserializer);
    // WHEN running through the iterator
    count(deserializer);
    // THEN there should have been two data source flips
    assertEquals(2, flips.get());
    deserializer.close();
}
Also used : InputNode(org.neo4j.unsafe.impl.batchimport.input.InputNode) AtomicReference(java.util.concurrent.atomic.AtomicReference) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) InvocationOnMock(org.mockito.invocation.InvocationOnMock) Test(org.junit.Test)

Example 5 with InputNode

use of org.neo4j.unsafe.impl.batchimport.input.InputNode in project neo4j by neo4j.

the class InputGroupsDeserializerTest method shouldCoordinateGroupCreationForParallelProcessing.

@Test
public void shouldCoordinateGroupCreationForParallelProcessing() throws Exception {
    // GIVEN
    List<DataFactory<InputNode>> data = new ArrayList<>();
    int processors = Runtime.getRuntime().availableProcessors();
    for (int i = 0; i < processors; i++) {
        StringBuilder builder = new StringBuilder(":ID(Group" + i + ")");
        for (int j = 0; j < 100; j++) {
            builder.append("\n" + j);
        }
        data.add(data(builder.toString()));
    }
    Groups groups = new Groups();
    IdType idType = IdType.INTEGER;
    Collector badCollector = mock(Collector.class);
    Configuration config = lowBufferSize(COMMAS, false);
    DeserializerFactory<InputNode> factory = defaultNodeDeserializer(groups, config, idType, badCollector);
    try (InputGroupsDeserializer<InputNode> deserializer = new InputGroupsDeserializer<>(data.iterator(), defaultFormatNodeFileHeader(), config, idType, processors, processors, factory, Validators.<InputNode>emptyValidator(), InputNode.class)) {
        // WHEN
        count(deserializer);
    }
    // THEN
    assertEquals(processors, groups.getOrCreate("LastOne").id());
    boolean[] seen = new boolean[processors];
    for (int i = 0; i < processors; i++) {
        String groupName = "Group" + i;
        groups.getOrCreate(groupName);
        assertFalse(seen[i]);
        seen[i] = true;
    }
}
Also used : InputNode(org.neo4j.unsafe.impl.batchimport.input.InputNode) ArrayList(java.util.ArrayList) Groups(org.neo4j.unsafe.impl.batchimport.input.Groups) Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) Test(org.junit.Test)

Aggregations

InputNode (org.neo4j.unsafe.impl.batchimport.input.InputNode)42 Test (org.junit.Test)32 Input (org.neo4j.unsafe.impl.batchimport.input.Input)21 InputRelationship (org.neo4j.unsafe.impl.batchimport.input.InputRelationship)8 NodeRecord (org.neo4j.kernel.impl.store.record.NodeRecord)6 Collector (org.neo4j.unsafe.impl.batchimport.input.Collector)6 Groups (org.neo4j.unsafe.impl.batchimport.input.Groups)4 File (java.io.File)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)3 Matchers.anyString (org.mockito.Matchers.anyString)3 StatsProvider (org.neo4j.unsafe.impl.batchimport.stats.StatsProvider)3 StringReader (java.io.StringReader)2 Writer (java.io.Writer)2 CharReadable (org.neo4j.csv.reader.CharReadable)2 GraphDatabaseService (org.neo4j.graphdb.GraphDatabaseService)2 Transaction (org.neo4j.graphdb.Transaction)2 NeoStores (org.neo4j.kernel.impl.store.NeoStores)2