Search in sources :

Example 1 with InputEntity

use of org.neo4j.internal.batchimport.input.InputEntity in project neo4j by neo4j.

the class CsvInputBatchImportIT method nodeDataAsFile.

private Path nodeDataAsFile(List<InputEntity> nodeData) throws IOException {
    Path file = testDirectory.file("nodes.csv");
    try (Writer writer = fileSystem.openAsWriter(file, StandardCharsets.UTF_8, false)) {
        // Header
        println(writer, "id:ID,name,pointA:Point{crs:WGS-84},pointB:Point,date:Date,time:Time,dateTime:DateTime,dateTime2:DateTime,localTime:LocalTime," + "localDateTime:LocalDateTime,duration:Duration,floatArray:float[],dateArray:date[],pointArray:point[],some-labels:LABEL");
        // Data
        for (InputEntity node : nodeData) {
            String csvLabels = csvLabels(node.labels());
            StringBuilder sb = new StringBuilder().append(node.id()).append(',');
            for (int i = 0; i < node.propertyCount(); i++) {
                sb.append(serializePropertyValue(node.propertyValue(i))).append(',');
            }
            if (csvLabels != null && !csvLabels.isEmpty()) {
                sb.append(csvLabels);
            }
            println(writer, sb.toString());
        }
    }
    return file;
}
Also used : Path(java.nio.file.Path) InputEntity(org.neo4j.internal.batchimport.input.InputEntity) Writer(java.io.Writer)

Example 2 with InputEntity

use of org.neo4j.internal.batchimport.input.InputEntity in project neo4j by neo4j.

the class CsvInput method sample.

private long[] sample(Iterable<DataFactory> dataFactories, Header.Factory headerFactory, PropertySizeCalculator valueSizeCalculator, ToIntFunction<InputEntity> additionalCalculator) throws IOException {
    // [entity count, property count, property size, labels (for nodes only)]
    long[] estimates = new long[4];
    try (CsvInputChunkProxy chunk = new CsvInputChunkProxy()) {
        // One group of input files
        int groupId = 0;
        for (// one input group
        DataFactory dataFactory : // one input group
        dataFactories) {
            groupId++;
            Header header = null;
            Data data = dataFactory.create(config);
            RawIterator<CharReadable, IOException> sources = data.stream();
            while (sources.hasNext()) {
                try (CharReadable source = sources.next()) {
                    if (header == null) {
                        // Extract the header from the first file in this group
                        // This is the only place we monitor type normalization because it's before import and it touches all headers
                        header = extractHeader(source, headerFactory, idType, config, groups, monitor);
                    }
                    try (CsvInputIterator iterator = new CsvInputIterator(source, data.decorator(), header, config, idType, EMPTY, CsvGroupInputIterator.extractors(config), groupId);
                        InputEntity entity = new InputEntity()) {
                        int entities = 0;
                        int properties = 0;
                        int propertySize = 0;
                        int additional = 0;
                        while (iterator.position() < ESTIMATE_SAMPLE_SIZE && iterator.next(chunk)) {
                            for (; chunk.next(entity); entities++) {
                                properties += entity.propertyCount();
                                propertySize += Inputs.calculatePropertySize(entity, valueSizeCalculator, NULL, memoryTracker);
                                additional += additionalCalculator.applyAsInt(entity);
                            }
                        }
                        if (entities > 0) {
                            long position = iterator.position();
                            double compressionRatio = iterator.compressionRatio();
                            double actualFileSize = source.length() / compressionRatio;
                            long entityCountInSource = (long) ((actualFileSize / position) * entities);
                            estimates[0] += entityCountInSource;
                            estimates[1] += ((double) properties / entities) * entityCountInSource;
                            estimates[2] += ((double) propertySize / entities) * entityCountInSource;
                            estimates[3] += ((double) additional / entities) * entityCountInSource;
                        }
                    }
                }
            }
        }
    }
    return estimates;
}
Also used : IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) CsvInputIterator.extractHeader(org.neo4j.internal.batchimport.input.csv.CsvInputIterator.extractHeader) CharReadable(org.neo4j.csv.reader.CharReadable) InputEntity(org.neo4j.internal.batchimport.input.InputEntity)

Example 3 with InputEntity

use of org.neo4j.internal.batchimport.input.InputEntity in project neo4j by neo4j.

the class CsvOutput method consume.

private void consume(String name, InputIterator entities, Header header, Deserializer deserializer) throws IOException {
    try (PrintStream out = file(name + "header.csv")) {
        serialize(out, header);
    }
    try {
        int threads = Runtime.getRuntime().availableProcessors();
        ExecutorService executor = Executors.newFixedThreadPool(threads);
        for (int i = 0; i < threads; i++) {
            int id = i;
            executor.submit((Callable<Void>) () -> {
                StringDeserialization deserialization = new StringDeserialization(config);
                try (PrintStream out = file(name + "-" + id + ".csv");
                    InputChunk chunk = entities.newChunk()) {
                    InputEntity entity = new InputEntity();
                    while (entities.next(chunk)) {
                        while (chunk.next(entity)) {
                            out.println(deserializer.apply(entity, deserialization, header));
                        }
                    }
                }
                return null;
            });
        }
        executor.shutdown();
        executor.awaitTermination(10, TimeUnit.MINUTES);
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw new IOException(e);
    }
}
Also used : PrintStream(java.io.PrintStream) StringDeserialization(org.neo4j.internal.batchimport.input.csv.StringDeserialization) ExecutorService(java.util.concurrent.ExecutorService) InputEntity(org.neo4j.internal.batchimport.input.InputEntity) InputChunk(org.neo4j.internal.batchimport.input.InputChunk) IOException(java.io.IOException)

Example 4 with InputEntity

use of org.neo4j.internal.batchimport.input.InputEntity in project neo4j by neo4j.

the class CsvInputEstimateCalculationIT method generateData.

private DataFactory generateData(Header.Factory factory, MutableLong start, long count, long nodeCount, String headerString, String fileName, Groups groups) throws IOException {
    Path file = testDirectory.file(fileName);
    Header header = factory.create(charSeeker(wrap(headerString), COMMAS, false), COMMAS, IdType.INTEGER, groups);
    Distribution<String> distribution = new Distribution<>(new String[] { "Token" });
    Deserialization<String> deserialization = new StringDeserialization(COMMAS);
    try (PrintWriter out = new PrintWriter(Files.newBufferedWriter(file));
        RandomEntityDataGenerator generator = new RandomEntityDataGenerator(nodeCount, count, toIntExact(count), random.seed(), start.longValue(), header, distribution, distribution, 0, 0, 5);
        InputChunk chunk = generator.newChunk();
        InputEntity entity = new InputEntity()) {
        out.println(headerString);
        while (generator.next(chunk)) {
            while (chunk.next(entity)) {
                out.println(convert(entity, deserialization, header));
            }
        }
    }
    start.add(count);
    return DataFactories.data(InputEntityDecorators.NO_DECORATOR, StandardCharsets.UTF_8, file);
}
Also used : Path(java.nio.file.Path) DataFactories.defaultFormatNodeFileHeader(org.neo4j.internal.batchimport.input.csv.DataFactories.defaultFormatNodeFileHeader) DataFactories.defaultFormatRelationshipFileHeader(org.neo4j.internal.batchimport.input.csv.DataFactories.defaultFormatRelationshipFileHeader) Distribution(org.neo4j.internal.batchimport.input.Distribution) InputEntity(org.neo4j.internal.batchimport.input.InputEntity) InputChunk(org.neo4j.internal.batchimport.input.InputChunk) ByteUnit.bytesToString(org.neo4j.io.ByteUnit.bytesToString) RandomEntityDataGenerator(org.neo4j.internal.batchimport.input.RandomEntityDataGenerator) PrintWriter(java.io.PrintWriter)

Example 5 with InputEntity

use of org.neo4j.internal.batchimport.input.InputEntity in project neo4j by neo4j.

the class ParallelBatchImporterTest method verifyData.

private void verifyData(int nodeCount, int relationshipCount, GraphDatabaseService db, Transaction tx, IdGroupDistribution groups, long nodeRandomSeed, long relationshipRandomSeed) throws IOException {
    // Read all nodes, relationships and properties ad verify against the input data.
    LongAdder propertyCount = new LongAdder();
    try (InputIterator nodes = nodes(nodeRandomSeed, nodeCount, config.batchSize(), inputIdGenerator, groups, propertyCount).iterator();
        InputIterator relationships = relationships(relationshipRandomSeed, relationshipCount, config.batchSize(), inputIdGenerator, groups, propertyCount, new LongAdder()).iterator();
        ResourceIterator<Node> dbNodes = tx.getAllNodes().iterator()) {
        // Nodes
        Map<String, Node> nodeByInputId = new HashMap<>(nodeCount);
        while (dbNodes.hasNext()) {
            Node node = dbNodes.next();
            String id = (String) node.getProperty("id");
            assertNull(nodeByInputId.put(id, node));
        }
        int verifiedNodes = 0;
        long allNodesScanLabelCount = 0;
        InputChunk chunk = nodes.newChunk();
        InputEntity input = new InputEntity();
        while (nodes.next(chunk)) {
            while (chunk.next(input)) {
                String iid = uniqueId(input.idGroup, input.objectId);
                Node node = nodeByInputId.get(iid);
                assertNodeEquals(input, node);
                verifiedNodes++;
                assertDegrees(node);
                allNodesScanLabelCount += Iterables.count(node.getLabels());
            }
        }
        assertEquals(nodeCount, verifiedNodes);
        // Labels
        long labelScanStoreEntryCount = stream(tx.getAllLabels()).flatMap(l -> tx.findNodes(l).stream()).count();
        assertEquals(allNodesScanLabelCount, labelScanStoreEntryCount, format("Expected label scan store and node store to have same number labels. But %n" + "#labelsInNodeStore=%d%n" + "#labelsInLabelScanStore=%d%n", allNodesScanLabelCount, labelScanStoreEntryCount));
        // Relationships
        chunk = relationships.newChunk();
        Map<String, Relationship> relationshipByName = new HashMap<>();
        for (Relationship relationship : tx.getAllRelationships()) {
            relationshipByName.put((String) relationship.getProperty("id"), relationship);
        }
        int verifiedRelationships = 0;
        while (relationships.next(chunk)) {
            while (chunk.next(input)) {
                if (!inputIdGenerator.isMiss(input.objectStartId) && !inputIdGenerator.isMiss(input.objectEndId)) {
                    // A relationship referring to missing nodes. The InputIdGenerator is expected to generate
                    // some (very few) of those. Skip it.
                    String name = (String) propertyOf(input, "id");
                    Relationship relationship = relationshipByName.get(name);
                    assertNotNull(relationship, "Expected there to be a relationship with name '" + name + "'");
                    assertEquals(nodeByInputId.get(uniqueId(input.startIdGroup, input.objectStartId)), relationship.getStartNode());
                    assertEquals(nodeByInputId.get(uniqueId(input.endIdGroup, input.objectEndId)), relationship.getEndNode());
                    assertRelationshipEquals(input, relationship);
                }
                verifiedRelationships++;
            }
        }
        assertEquals(relationshipCount, verifiedRelationships);
    }
}
Also used : ByteUnit.mebiBytes(org.neo4j.io.ByteUnit.mebiBytes) Arrays(java.util.Arrays) ResourceIterator(org.neo4j.graphdb.ResourceIterator) Array(java.lang.reflect.Array) NullLogService(org.neo4j.logging.internal.NullLogService) RandomExtension(org.neo4j.test.extension.RandomExtension) Collector(org.neo4j.internal.batchimport.input.Collector) Direction(org.neo4j.graphdb.Direction) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) Config(org.neo4j.configuration.Config) Result(org.neo4j.consistency.ConsistencyCheckService.Result) DefaultPageCacheTracer(org.neo4j.io.pagecache.tracing.DefaultPageCacheTracer) NullLogProvider(org.neo4j.logging.NullLogProvider) DatabaseLayout(org.neo4j.io.layout.DatabaseLayout) InputChunk(org.neo4j.internal.batchimport.input.InputChunk) DEFAULT_DATABASE_NAME(org.neo4j.configuration.GraphDatabaseSettings.DEFAULT_DATABASE_NAME) RandomValues(org.neo4j.values.storable.RandomValues) ExtendWith(org.junit.jupiter.api.extension.ExtendWith) RandomRule(org.neo4j.test.rule.RandomRule) TransactionLogInitializer(org.neo4j.kernel.impl.transaction.log.files.TransactionLogInitializer) Input.knownEstimates(org.neo4j.internal.batchimport.input.Input.knownEstimates) Map(java.util.Map) Transaction(org.neo4j.graphdb.Transaction) Resources(org.junit.jupiter.api.parallel.Resources) Path(java.nio.file.Path) Input(org.neo4j.internal.batchimport.input.Input) MethodSource(org.junit.jupiter.params.provider.MethodSource) Standard(org.neo4j.kernel.impl.store.format.standard.Standard) Set(java.util.Set) UUID(java.util.UUID) InputEntityVisitor(org.neo4j.internal.batchimport.input.InputEntityVisitor) ExecutionMonitor(org.neo4j.internal.batchimport.staging.ExecutionMonitor) ResourceLock(org.junit.jupiter.api.parallel.ResourceLock) Arguments(org.junit.jupiter.params.provider.Arguments) Neo4jLayoutExtension(org.neo4j.test.extension.Neo4jLayoutExtension) String.format(java.lang.String.format) Entity(org.neo4j.graphdb.Entity) IdType(org.neo4j.internal.batchimport.input.IdType) INSTANCE(org.neo4j.memory.EmptyMemoryTracker.INSTANCE) Stream(java.util.stream.Stream) Assertions.assertTrue(org.junit.jupiter.api.Assertions.assertTrue) ProcessorAssignmentStrategies(org.neo4j.internal.batchimport.staging.ProcessorAssignmentStrategies) RelationshipType(org.neo4j.graphdb.RelationshipType) DatabaseManagementService(org.neo4j.dbms.api.DatabaseManagementService) ConsistencyCheckService(org.neo4j.consistency.ConsistencyCheckService) SuppressOutput(org.neo4j.test.rule.SuppressOutput) EMPTY(org.neo4j.internal.batchimport.AdditionalInitialIds.EMPTY) LongAdder(java.util.concurrent.atomic.LongAdder) Assertions.assertNotNull(org.junit.jupiter.api.Assertions.assertNotNull) Label(org.neo4j.graphdb.Label) GraphDatabaseSettings(org.neo4j.configuration.GraphDatabaseSettings) Groups(org.neo4j.internal.batchimport.input.Groups) StageExecution(org.neo4j.internal.batchimport.staging.StageExecution) Assertions.assertNull(org.junit.jupiter.api.Assertions.assertNull) HashMap(java.util.HashMap) SuppressOutputExtension(org.neo4j.test.extension.SuppressOutputExtension) Node(org.neo4j.graphdb.Node) RecordFormats(org.neo4j.kernel.impl.store.format.RecordFormats) Values(org.neo4j.values.storable.Values) TestDatabaseManagementServiceBuilder(org.neo4j.test.TestDatabaseManagementServiceBuilder) GraphDatabaseService(org.neo4j.graphdb.GraphDatabaseService) Inject(org.neo4j.test.extension.Inject) Iterables(org.neo4j.internal.helpers.collection.Iterables) Math.toIntExact(java.lang.Math.toIntExact) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) JobScheduler(org.neo4j.scheduler.JobScheduler) Arguments.arguments(org.junit.jupiter.params.provider.Arguments.arguments) DependencyResolver(org.neo4j.common.DependencyResolver) PrintStream(java.io.PrintStream) Iterables.count(org.neo4j.internal.helpers.collection.Iterables.count) Files(java.nio.file.Files) Iterables.stream(org.neo4j.internal.helpers.collection.Iterables.stream) IndexImporterFactoryImpl(org.neo4j.kernel.impl.index.schema.IndexImporterFactoryImpl) InputEntity(org.neo4j.internal.batchimport.input.InputEntity) IOException(java.io.IOException) ProgressMonitorFactory(org.neo4j.internal.helpers.progress.ProgressMonitorFactory) ConsistencyCheckIncompleteException(org.neo4j.consistency.checking.full.ConsistencyCheckIncompleteException) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) Relationship(org.neo4j.graphdb.Relationship) Iterators.asSet(org.neo4j.internal.helpers.collection.Iterators.asSet) Group(org.neo4j.internal.batchimport.input.Group) ThreadPoolJobScheduler(org.neo4j.test.scheduler.ThreadPoolJobScheduler) FileSystemAbstraction(org.neo4j.io.fs.FileSystemAbstraction) HashMap(java.util.HashMap) Node(org.neo4j.graphdb.Node) InputChunk(org.neo4j.internal.batchimport.input.InputChunk) LongAdder(java.util.concurrent.atomic.LongAdder) Relationship(org.neo4j.graphdb.Relationship) InputEntity(org.neo4j.internal.batchimport.input.InputEntity)

Aggregations

InputEntity (org.neo4j.internal.batchimport.input.InputEntity)10 IOException (java.io.IOException)4 Path (java.nio.file.Path)4 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 Map (java.util.Map)3 Config (org.neo4j.configuration.Config)3 DatabaseManagementService (org.neo4j.dbms.api.DatabaseManagementService)3 GraphDatabaseService (org.neo4j.graphdb.GraphDatabaseService)3 Node (org.neo4j.graphdb.Node)3 Relationship (org.neo4j.graphdb.Relationship)3 Transaction (org.neo4j.graphdb.Transaction)3 InputChunk (org.neo4j.internal.batchimport.input.InputChunk)3 IndexImporterFactoryImpl (org.neo4j.kernel.impl.index.schema.IndexImporterFactoryImpl)3 JobScheduler (org.neo4j.scheduler.JobScheduler)3 TestDatabaseManagementServiceBuilder (org.neo4j.test.TestDatabaseManagementServiceBuilder)3 ThreadPoolJobScheduler (org.neo4j.test.scheduler.ThreadPoolJobScheduler)3 PrintStream (java.io.PrintStream)2 String.format (java.lang.String.format)2 Set (java.util.Set)2