Search in sources :

Example 1 with InputChunk

use of org.neo4j.internal.batchimport.input.InputChunk in project neo4j by neo4j.

the class ExhaustingEntityImporterRunnable method run.

@Override
public void run() {
    try (InputChunk chunk = data.newChunk()) {
        while (data.next(chunk)) {
            control.assertHealthy();
            int count = 0;
            while (chunk.next(visitor)) {
                count++;
            }
            roughEntityCountProgress.add(count);
        }
    } catch (IOException e) {
        control.panic(e);
        throw new RuntimeException(e);
    } catch (Throwable e) {
        control.panic(e);
        throw e;
    } finally {
        visitor.close();
    }
}
Also used : InputChunk(org.neo4j.internal.batchimport.input.InputChunk) IOException(java.io.IOException)

Example 2 with InputChunk

use of org.neo4j.internal.batchimport.input.InputChunk in project neo4j by neo4j.

the class CsvOutput method consume.

private void consume(String name, InputIterator entities, Header header, Deserializer deserializer) throws IOException {
    try (PrintStream out = file(name + "header.csv")) {
        serialize(out, header);
    }
    try {
        int threads = Runtime.getRuntime().availableProcessors();
        ExecutorService executor = Executors.newFixedThreadPool(threads);
        for (int i = 0; i < threads; i++) {
            int id = i;
            executor.submit((Callable<Void>) () -> {
                StringDeserialization deserialization = new StringDeserialization(config);
                try (PrintStream out = file(name + "-" + id + ".csv");
                    InputChunk chunk = entities.newChunk()) {
                    InputEntity entity = new InputEntity();
                    while (entities.next(chunk)) {
                        while (chunk.next(entity)) {
                            out.println(deserializer.apply(entity, deserialization, header));
                        }
                    }
                }
                return null;
            });
        }
        executor.shutdown();
        executor.awaitTermination(10, TimeUnit.MINUTES);
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw new IOException(e);
    }
}
Also used : PrintStream(java.io.PrintStream) StringDeserialization(org.neo4j.internal.batchimport.input.csv.StringDeserialization) ExecutorService(java.util.concurrent.ExecutorService) InputEntity(org.neo4j.internal.batchimport.input.InputEntity) InputChunk(org.neo4j.internal.batchimport.input.InputChunk) IOException(java.io.IOException)

Example 3 with InputChunk

use of org.neo4j.internal.batchimport.input.InputChunk in project neo4j by neo4j.

the class CsvInputEstimateCalculationIT method generateData.

private DataFactory generateData(Header.Factory factory, MutableLong start, long count, long nodeCount, String headerString, String fileName, Groups groups) throws IOException {
    Path file = testDirectory.file(fileName);
    Header header = factory.create(charSeeker(wrap(headerString), COMMAS, false), COMMAS, IdType.INTEGER, groups);
    Distribution<String> distribution = new Distribution<>(new String[] { "Token" });
    Deserialization<String> deserialization = new StringDeserialization(COMMAS);
    try (PrintWriter out = new PrintWriter(Files.newBufferedWriter(file));
        RandomEntityDataGenerator generator = new RandomEntityDataGenerator(nodeCount, count, toIntExact(count), random.seed(), start.longValue(), header, distribution, distribution, 0, 0, 5);
        InputChunk chunk = generator.newChunk();
        InputEntity entity = new InputEntity()) {
        out.println(headerString);
        while (generator.next(chunk)) {
            while (chunk.next(entity)) {
                out.println(convert(entity, deserialization, header));
            }
        }
    }
    start.add(count);
    return DataFactories.data(InputEntityDecorators.NO_DECORATOR, StandardCharsets.UTF_8, file);
}
Also used : Path(java.nio.file.Path) DataFactories.defaultFormatNodeFileHeader(org.neo4j.internal.batchimport.input.csv.DataFactories.defaultFormatNodeFileHeader) DataFactories.defaultFormatRelationshipFileHeader(org.neo4j.internal.batchimport.input.csv.DataFactories.defaultFormatRelationshipFileHeader) Distribution(org.neo4j.internal.batchimport.input.Distribution) InputEntity(org.neo4j.internal.batchimport.input.InputEntity) InputChunk(org.neo4j.internal.batchimport.input.InputChunk) ByteUnit.bytesToString(org.neo4j.io.ByteUnit.bytesToString) RandomEntityDataGenerator(org.neo4j.internal.batchimport.input.RandomEntityDataGenerator) PrintWriter(java.io.PrintWriter)

Example 4 with InputChunk

use of org.neo4j.internal.batchimport.input.InputChunk in project neo4j by neo4j.

the class ParallelBatchImporterTest method verifyData.

private void verifyData(int nodeCount, int relationshipCount, GraphDatabaseService db, Transaction tx, IdGroupDistribution groups, long nodeRandomSeed, long relationshipRandomSeed) throws IOException {
    // Read all nodes, relationships and properties ad verify against the input data.
    LongAdder propertyCount = new LongAdder();
    try (InputIterator nodes = nodes(nodeRandomSeed, nodeCount, config.batchSize(), inputIdGenerator, groups, propertyCount).iterator();
        InputIterator relationships = relationships(relationshipRandomSeed, relationshipCount, config.batchSize(), inputIdGenerator, groups, propertyCount, new LongAdder()).iterator();
        ResourceIterator<Node> dbNodes = tx.getAllNodes().iterator()) {
        // Nodes
        Map<String, Node> nodeByInputId = new HashMap<>(nodeCount);
        while (dbNodes.hasNext()) {
            Node node = dbNodes.next();
            String id = (String) node.getProperty("id");
            assertNull(nodeByInputId.put(id, node));
        }
        int verifiedNodes = 0;
        long allNodesScanLabelCount = 0;
        InputChunk chunk = nodes.newChunk();
        InputEntity input = new InputEntity();
        while (nodes.next(chunk)) {
            while (chunk.next(input)) {
                String iid = uniqueId(input.idGroup, input.objectId);
                Node node = nodeByInputId.get(iid);
                assertNodeEquals(input, node);
                verifiedNodes++;
                assertDegrees(node);
                allNodesScanLabelCount += Iterables.count(node.getLabels());
            }
        }
        assertEquals(nodeCount, verifiedNodes);
        // Labels
        long labelScanStoreEntryCount = stream(tx.getAllLabels()).flatMap(l -> tx.findNodes(l).stream()).count();
        assertEquals(allNodesScanLabelCount, labelScanStoreEntryCount, format("Expected label scan store and node store to have same number labels. But %n" + "#labelsInNodeStore=%d%n" + "#labelsInLabelScanStore=%d%n", allNodesScanLabelCount, labelScanStoreEntryCount));
        // Relationships
        chunk = relationships.newChunk();
        Map<String, Relationship> relationshipByName = new HashMap<>();
        for (Relationship relationship : tx.getAllRelationships()) {
            relationshipByName.put((String) relationship.getProperty("id"), relationship);
        }
        int verifiedRelationships = 0;
        while (relationships.next(chunk)) {
            while (chunk.next(input)) {
                if (!inputIdGenerator.isMiss(input.objectStartId) && !inputIdGenerator.isMiss(input.objectEndId)) {
                    // A relationship referring to missing nodes. The InputIdGenerator is expected to generate
                    // some (very few) of those. Skip it.
                    String name = (String) propertyOf(input, "id");
                    Relationship relationship = relationshipByName.get(name);
                    assertNotNull(relationship, "Expected there to be a relationship with name '" + name + "'");
                    assertEquals(nodeByInputId.get(uniqueId(input.startIdGroup, input.objectStartId)), relationship.getStartNode());
                    assertEquals(nodeByInputId.get(uniqueId(input.endIdGroup, input.objectEndId)), relationship.getEndNode());
                    assertRelationshipEquals(input, relationship);
                }
                verifiedRelationships++;
            }
        }
        assertEquals(relationshipCount, verifiedRelationships);
    }
}
Also used : ByteUnit.mebiBytes(org.neo4j.io.ByteUnit.mebiBytes) Arrays(java.util.Arrays) ResourceIterator(org.neo4j.graphdb.ResourceIterator) Array(java.lang.reflect.Array) NullLogService(org.neo4j.logging.internal.NullLogService) RandomExtension(org.neo4j.test.extension.RandomExtension) Collector(org.neo4j.internal.batchimport.input.Collector) Direction(org.neo4j.graphdb.Direction) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) Config(org.neo4j.configuration.Config) Result(org.neo4j.consistency.ConsistencyCheckService.Result) DefaultPageCacheTracer(org.neo4j.io.pagecache.tracing.DefaultPageCacheTracer) NullLogProvider(org.neo4j.logging.NullLogProvider) DatabaseLayout(org.neo4j.io.layout.DatabaseLayout) InputChunk(org.neo4j.internal.batchimport.input.InputChunk) DEFAULT_DATABASE_NAME(org.neo4j.configuration.GraphDatabaseSettings.DEFAULT_DATABASE_NAME) RandomValues(org.neo4j.values.storable.RandomValues) ExtendWith(org.junit.jupiter.api.extension.ExtendWith) RandomRule(org.neo4j.test.rule.RandomRule) TransactionLogInitializer(org.neo4j.kernel.impl.transaction.log.files.TransactionLogInitializer) Input.knownEstimates(org.neo4j.internal.batchimport.input.Input.knownEstimates) Map(java.util.Map) Transaction(org.neo4j.graphdb.Transaction) Resources(org.junit.jupiter.api.parallel.Resources) Path(java.nio.file.Path) Input(org.neo4j.internal.batchimport.input.Input) MethodSource(org.junit.jupiter.params.provider.MethodSource) Standard(org.neo4j.kernel.impl.store.format.standard.Standard) Set(java.util.Set) UUID(java.util.UUID) InputEntityVisitor(org.neo4j.internal.batchimport.input.InputEntityVisitor) ExecutionMonitor(org.neo4j.internal.batchimport.staging.ExecutionMonitor) ResourceLock(org.junit.jupiter.api.parallel.ResourceLock) Arguments(org.junit.jupiter.params.provider.Arguments) Neo4jLayoutExtension(org.neo4j.test.extension.Neo4jLayoutExtension) String.format(java.lang.String.format) Entity(org.neo4j.graphdb.Entity) IdType(org.neo4j.internal.batchimport.input.IdType) INSTANCE(org.neo4j.memory.EmptyMemoryTracker.INSTANCE) Stream(java.util.stream.Stream) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) ProcessorAssignmentStrategies(org.neo4j.internal.batchimport.staging.ProcessorAssignmentStrategies) RelationshipType(org.neo4j.graphdb.RelationshipType) DatabaseManagementService(org.neo4j.dbms.api.DatabaseManagementService) ConsistencyCheckService(org.neo4j.consistency.ConsistencyCheckService) SuppressOutput(org.neo4j.test.rule.SuppressOutput) EMPTY(org.neo4j.internal.batchimport.AdditionalInitialIds.EMPTY) LongAdder(java.util.concurrent.atomic.LongAdder) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) Label(org.neo4j.graphdb.Label) GraphDatabaseSettings(org.neo4j.configuration.GraphDatabaseSettings) Groups(org.neo4j.internal.batchimport.input.Groups) StageExecution(org.neo4j.internal.batchimport.staging.StageExecution) Assertions.assertNull(org.junit.jupiter.api.Assertions.assertNull) HashMap(java.util.HashMap) SuppressOutputExtension(org.neo4j.test.extension.SuppressOutputExtension) Node(org.neo4j.graphdb.Node) RecordFormats(org.neo4j.kernel.impl.store.format.RecordFormats) Values(org.neo4j.values.storable.Values) TestDatabaseManagementServiceBuilder(org.neo4j.test.TestDatabaseManagementServiceBuilder) GraphDatabaseService(org.neo4j.graphdb.GraphDatabaseService) Inject(org.neo4j.test.extension.Inject) Iterables(org.neo4j.internal.helpers.collection.Iterables) Math.toIntExact(java.lang.Math.toIntExact) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) JobScheduler(org.neo4j.scheduler.JobScheduler) Arguments.arguments(org.junit.jupiter.params.provider.Arguments.arguments) DependencyResolver(org.neo4j.common.DependencyResolver) PrintStream(java.io.PrintStream) Iterables.count(org.neo4j.internal.helpers.collection.Iterables.count) Files(java.nio.file.Files) Iterables.stream(org.neo4j.internal.helpers.collection.Iterables.stream) IndexImporterFactoryImpl(org.neo4j.kernel.impl.index.schema.IndexImporterFactoryImpl) InputEntity(org.neo4j.internal.batchimport.input.InputEntity) IOException(java.io.IOException) ProgressMonitorFactory(org.neo4j.internal.helpers.progress.ProgressMonitorFactory) ConsistencyCheckIncompleteException(org.neo4j.consistency.checking.full.ConsistencyCheckIncompleteException) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Relationship(org.neo4j.graphdb.Relationship) Iterators.asSet(org.neo4j.internal.helpers.collection.Iterators.asSet) Group(org.neo4j.internal.batchimport.input.Group) ThreadPoolJobScheduler(org.neo4j.test.scheduler.ThreadPoolJobScheduler) FileSystemAbstraction(org.neo4j.io.fs.FileSystemAbstraction) HashMap(java.util.HashMap) Node(org.neo4j.graphdb.Node) InputChunk(org.neo4j.internal.batchimport.input.InputChunk) LongAdder(java.util.concurrent.atomic.LongAdder) Relationship(org.neo4j.graphdb.Relationship) InputEntity(org.neo4j.internal.batchimport.input.InputEntity)

Aggregations

InputChunk (org.neo4j.internal.batchimport.input.InputChunk)4 IOException (java.io.IOException)3 InputEntity (org.neo4j.internal.batchimport.input.InputEntity)3 PrintStream (java.io.PrintStream)2 Path (java.nio.file.Path)2 PrintWriter (java.io.PrintWriter)1 Math.toIntExact (java.lang.Math.toIntExact)1 String.format (java.lang.String.format)1 Array (java.lang.reflect.Array)1 Files (java.nio.file.Files)1 Arrays (java.util.Arrays)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 Set (java.util.Set)1 UUID (java.util.UUID)1 ExecutorService (java.util.concurrent.ExecutorService)1 LongAdder (java.util.concurrent.atomic.LongAdder)1 Stream (java.util.stream.Stream)1 Assertions.assertThat (org.assertj.core.api.Assertions.assertThat)1 Assertions.assertEquals (org.junit.jupiter.api.Assertions.assertEquals)1