Search in sources :

Example 1 with Groups

use of org.neo4j.internal.batchimport.input.Groups in project neo4j by neo4j.

the class CsvInputTest method shouldHaveRelationshipsSpecifyStartEndNodeIdGroupsInHeader.

@Test
public void shouldHaveRelationshipsSpecifyStartEndNodeIdGroupsInHeader() throws Exception {
    // GIVEN
    IdType idType = IdType.INTEGER;
    Iterable<DataFactory> data = dataIterable(data("123,TYPE,234\n" + "345,TYPE,456"));
    Groups groups = new Groups();
    Group startNodeGroup = groups.getOrCreate("StartGroup");
    Group endNodeGroup = groups.getOrCreate("EndGroup");
    Iterable<DataFactory> nodeHeader = dataIterable(data(":ID(" + startNodeGroup.name() + ")"), data(":ID(" + endNodeGroup.name() + ")"));
    Input input = new CsvInput(nodeHeader, defaultFormatNodeFileHeader(), data, header(entry(null, Type.START_ID, startNodeGroup.name(), CsvInput.idExtractor(idType, extractors)), entry(null, Type.TYPE, extractors.string()), entry(null, Type.END_ID, endNodeGroup.name(), CsvInput.idExtractor(idType, extractors))), idType, config(), NO_MONITOR, INSTANCE);
    // WHEN/THEN
    try (InputIterator relationships = input.relationships(EMPTY).iterator()) {
        assertRelationship(relationships, startNodeGroup, 123L, endNodeGroup, 234L, "TYPE", properties());
        assertRelationship(relationships, startNodeGroup, 345L, endNodeGroup, 456L, "TYPE", properties());
        assertFalse(readNext(relationships));
    }
}
Also used : Group(org.neo4j.internal.batchimport.input.Group) InputIterator(org.neo4j.internal.batchimport.InputIterator) Input(org.neo4j.internal.batchimport.input.Input) Groups(org.neo4j.internal.batchimport.input.Groups) IdType(org.neo4j.internal.batchimport.input.IdType) Test(org.junit.Test)

Example 2 with Groups

use of org.neo4j.internal.batchimport.input.Groups in project neo4j by neo4j.

the class QuickImport method main.

public static void main(String[] arguments) throws IOException {
    Args args = Args.parse(arguments);
    long nodeCount = parseLongWithUnit(args.get("nodes", null));
    long relationshipCount = parseLongWithUnit(args.get("relationships", null));
    int labelCount = args.getNumber("labels", 4).intValue();
    int relationshipTypeCount = args.getNumber("relationship-types", 4).intValue();
    Path dir = Path.of(args.get("into"));
    long randomSeed = args.getNumber("random-seed", currentTimeMillis()).longValue();
    Configuration config = Configuration.COMMAS;
    Extractors extractors = new Extractors(config.arrayDelimiter());
    IdType idType = IdType.valueOf(args.get("id-type", IdType.INTEGER.name()));
    Groups groups = new Groups();
    Header nodeHeader = parseNodeHeader(args, idType, extractors, groups);
    Header relationshipHeader = parseRelationshipHeader(args, idType, extractors, groups);
    Config dbConfig;
    String dbConfigFileName = args.get("db-config", null);
    if (dbConfigFileName != null) {
        dbConfig = Config.newBuilder().fromFile(Path.of(dbConfigFileName)).build();
    } else {
        dbConfig = Config.defaults();
    }
    Boolean highIo = args.has("high-io") ? args.getBoolean("high-io") : null;
    LogProvider logging = NullLogProvider.getInstance();
    long pageCacheMemory = args.getNumber("pagecache-memory", org.neo4j.internal.batchimport.Configuration.MAX_PAGE_CACHE_MEMORY).longValue();
    org.neo4j.internal.batchimport.Configuration importConfig = new org.neo4j.internal.batchimport.Configuration.Overridden(defaultConfiguration(dir)) {

        @Override
        public int maxNumberOfProcessors() {
            return args.getNumber("processors", super.maxNumberOfProcessors()).intValue();
        }

        @Override
        public boolean highIO() {
            return highIo != null ? highIo : super.highIO();
        }

        @Override
        public long pageCacheMemory() {
            return pageCacheMemory;
        }

        @Override
        public long maxMemoryUsage() {
            String custom = args.get("max-memory", null);
            return custom != null ? parseMaxMemory(custom) : super.maxMemoryUsage();
        }

        @Override
        public IndexConfig indexConfig() {
            return IndexConfig.create().withLabelIndex().withRelationshipTypeIndex();
        }
    };
    float factorBadNodeData = args.getNumber("factor-bad-node-data", 0).floatValue();
    float factorBadRelationshipData = args.getNumber("factor-bad-relationship-data", 0).floatValue();
    Input input = new DataGeneratorInput(nodeCount, relationshipCount, idType, randomSeed, 0, nodeHeader, relationshipHeader, labelCount, relationshipTypeCount, factorBadNodeData, factorBadRelationshipData);
    try (FileSystemAbstraction fileSystem = new DefaultFileSystemAbstraction();
        Lifespan life = new Lifespan()) {
        BatchImporter consumer;
        if (args.getBoolean("to-csv")) {
            consumer = new CsvOutput(dir, nodeHeader, relationshipHeader, config);
        } else {
            System.out.println("Seed " + randomSeed);
            final JobScheduler jobScheduler = life.add(createScheduler());
            boolean verbose = args.getBoolean("v");
            ExecutionMonitor monitor = verbose ? new SpectrumExecutionMonitor(2, TimeUnit.SECONDS, System.out, 100) : defaultVisible();
            consumer = BatchImporterFactory.withHighestPriority().instantiate(DatabaseLayout.ofFlat(dir), fileSystem, PageCacheTracer.NULL, importConfig, new SimpleLogService(logging, logging), monitor, EMPTY, dbConfig, RecordFormatSelector.selectForConfig(dbConfig, logging), NO_MONITOR, jobScheduler, Collector.EMPTY, TransactionLogInitializer.getLogFilesInitializer(), new IndexImporterFactoryImpl(dbConfig), INSTANCE);
        }
        consumer.doImport(input);
    }
}
Also used : DefaultFileSystemAbstraction(org.neo4j.io.fs.DefaultFileSystemAbstraction) FileSystemAbstraction(org.neo4j.io.fs.FileSystemAbstraction) SpectrumExecutionMonitor(org.neo4j.internal.batchimport.staging.SpectrumExecutionMonitor) Configuration(org.neo4j.csv.reader.Configuration) Configuration.defaultConfiguration(org.neo4j.internal.batchimport.Configuration.defaultConfiguration) SimpleLogService(org.neo4j.logging.internal.SimpleLogService) Config(org.neo4j.configuration.Config) IndexConfig(org.neo4j.internal.batchimport.IndexConfig) DataGeneratorInput(org.neo4j.internal.batchimport.input.DataGeneratorInput) Input(org.neo4j.internal.batchimport.input.Input) BatchImporter(org.neo4j.internal.batchimport.BatchImporter) ParallelBatchImporter(org.neo4j.internal.batchimport.ParallelBatchImporter) Groups(org.neo4j.internal.batchimport.input.Groups) SpectrumExecutionMonitor(org.neo4j.internal.batchimport.staging.SpectrumExecutionMonitor) ExecutionMonitor(org.neo4j.internal.batchimport.staging.ExecutionMonitor) Path(java.nio.file.Path) JobScheduler(org.neo4j.scheduler.JobScheduler) Args(org.neo4j.internal.helpers.Args) DefaultFileSystemAbstraction(org.neo4j.io.fs.DefaultFileSystemAbstraction) IdType(org.neo4j.internal.batchimport.input.IdType) LogProvider(org.neo4j.logging.LogProvider) NullLogProvider(org.neo4j.logging.NullLogProvider) Extractors(org.neo4j.csv.reader.Extractors) Header(org.neo4j.internal.batchimport.input.csv.Header) IndexImporterFactoryImpl(org.neo4j.kernel.impl.index.schema.IndexImporterFactoryImpl) DataGeneratorInput(org.neo4j.internal.batchimport.input.DataGeneratorInput) Lifespan(org.neo4j.kernel.lifecycle.Lifespan)

Example 3 with Groups

use of org.neo4j.internal.batchimport.input.Groups in project neo4j by neo4j.

the class CsvInputEstimateCalculationIT method shouldCalculateCorrectEstimatesOnEmptyData.

@Test
void shouldCalculateCorrectEstimatesOnEmptyData() throws Exception {
    // given
    Groups groups = new Groups();
    Collection<DataFactory> nodeData = singletonList(generateData(defaultFormatNodeFileHeader(), new MutableLong(), 0, 0, ":ID", "nodes-1.csv", groups));
    Collection<DataFactory> relationshipData = singletonList(generateData(defaultFormatRelationshipFileHeader(), new MutableLong(), 0, 0, ":START_ID,:TYPE,:END_ID", "rels-1.csv", groups));
    Input input = new CsvInput(nodeData, defaultFormatNodeFileHeader(), relationshipData, defaultFormatRelationshipFileHeader(), IdType.INTEGER, COMMAS, CsvInput.NO_MONITOR, groups, INSTANCE);
    // when
    Input.Estimates estimates = input.calculateEstimates(new PropertyValueRecordSizeCalculator(LATEST_RECORD_FORMATS.property().getRecordSize(NO_STORE_HEADER), GraphDatabaseInternalSettings.string_block_size.defaultValue(), 0, GraphDatabaseInternalSettings.array_block_size.defaultValue(), 0));
    // then
    assertEquals(0, estimates.numberOfNodes());
    assertEquals(0, estimates.numberOfRelationships());
    assertEquals(0, estimates.numberOfRelationshipProperties());
    assertEquals(0, estimates.numberOfNodeProperties());
    assertEquals(0, estimates.numberOfNodeLabels());
}
Also used : MutableLong(org.apache.commons.lang3.mutable.MutableLong) Input(org.neo4j.internal.batchimport.input.Input) Groups(org.neo4j.internal.batchimport.input.Groups) PropertyValueRecordSizeCalculator(org.neo4j.kernel.impl.store.PropertyValueRecordSizeCalculator) Test(org.junit.jupiter.api.Test)

Example 4 with Groups

use of org.neo4j.internal.batchimport.input.Groups in project neo4j by neo4j.

the class CsvInputEstimateCalculationIT method generateData.

private Input generateData() throws IOException {
    List<DataFactory> nodeData = new ArrayList<>();
    MutableLong start = new MutableLong();
    Groups groups = new Groups();
    nodeData.add(generateData(defaultFormatNodeFileHeader(), start, NODE_COUNT / 3, NODE_COUNT, ":ID", "nodes-1.csv", groups));
    nodeData.add(generateData(defaultFormatNodeFileHeader(), start, NODE_COUNT / 3, NODE_COUNT, ":ID,:LABEL,name:String,yearOfBirth:int", "nodes-2.csv", groups));
    nodeData.add(generateData(defaultFormatNodeFileHeader(), start, NODE_COUNT - start.longValue(), NODE_COUNT, ":ID,name:String,yearOfBirth:int,other", "nodes-3.csv", groups));
    List<DataFactory> relationshipData = new ArrayList<>();
    start.setValue(0);
    relationshipData.add(generateData(defaultFormatRelationshipFileHeader(), start, RELATIONSHIP_COUNT / 2, NODE_COUNT, ":START_ID,:TYPE,:END_ID", "relationships-1.csv", groups));
    relationshipData.add(generateData(defaultFormatRelationshipFileHeader(), start, RELATIONSHIP_COUNT - start.longValue(), NODE_COUNT, ":START_ID,:TYPE,:END_ID,prop1,prop2", "relationships-2.csv", groups));
    return new CsvInput(nodeData, defaultFormatNodeFileHeader(), relationshipData, defaultFormatRelationshipFileHeader(), IdType.INTEGER, COMMAS, CsvInput.NO_MONITOR, groups, INSTANCE);
}
Also used : MutableLong(org.apache.commons.lang3.mutable.MutableLong) Groups(org.neo4j.internal.batchimport.input.Groups) ArrayList(java.util.ArrayList)

Example 5 with Groups

use of org.neo4j.internal.batchimport.input.Groups in project neo4j by neo4j.

the class CsvInputTest method shouldHaveNodesBelongToGroupSpecifiedInHeader.

@Test
public void shouldHaveNodesBelongToGroupSpecifiedInHeader() throws Exception {
    // GIVEN
    IdType idType = IdType.INTEGER;
    Iterable<DataFactory> data = dataIterable(data("123,one\n" + "456,two"));
    Groups groups = new Groups();
    Group group = groups.getOrCreate("MyGroup");
    Input input = new CsvInput(data, header(entry(null, Type.ID, group.name(), CsvInput.idExtractor(idType, extractors)), entry("name", Type.PROPERTY, extractors.string())), datas(), defaultFormatRelationshipFileHeader(), idType, config(), NO_MONITOR, INSTANCE);
    // WHEN/THEN
    try (InputIterator nodes = input.nodes(EMPTY).iterator()) {
        assertNextNode(nodes, group, 123L, properties("name", "one"), labels());
        assertNextNode(nodes, group, 456L, properties("name", "two"), labels());
        assertFalse(readNext(nodes));
    }
}
Also used : Group(org.neo4j.internal.batchimport.input.Group) InputIterator(org.neo4j.internal.batchimport.InputIterator) Input(org.neo4j.internal.batchimport.input.Input) Groups(org.neo4j.internal.batchimport.input.Groups) IdType(org.neo4j.internal.batchimport.input.IdType) Test(org.junit.Test)

Aggregations

Groups (org.neo4j.internal.batchimport.input.Groups)7 Input (org.neo4j.internal.batchimport.input.Input)5 IdType (org.neo4j.internal.batchimport.input.IdType)4 Path (java.nio.file.Path)3 Config (org.neo4j.configuration.Config)3 Group (org.neo4j.internal.batchimport.input.Group)3 ExecutionMonitor (org.neo4j.internal.batchimport.staging.ExecutionMonitor)3 IndexImporterFactoryImpl (org.neo4j.kernel.impl.index.schema.IndexImporterFactoryImpl)3 JobScheduler (org.neo4j.scheduler.JobScheduler)3 PrintStream (java.io.PrintStream)2 LongAdder (java.util.concurrent.atomic.LongAdder)2 MutableLong (org.apache.commons.lang3.mutable.MutableLong)2 Test (org.junit.Test)2 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)2 MethodSource (org.junit.jupiter.params.provider.MethodSource)2 DatabaseManagementService (org.neo4j.dbms.api.DatabaseManagementService)2 GraphDatabaseService (org.neo4j.graphdb.GraphDatabaseService)2 Transaction (org.neo4j.graphdb.Transaction)2 FileSystemAbstraction (org.neo4j.io.fs.FileSystemAbstraction)2 DefaultPageCacheTracer (org.neo4j.io.pagecache.tracing.DefaultPageCacheTracer)2