Search in sources :

Example 16 with IdType

use of org.neo4j.internal.batchimport.input.IdType in project neo4j by neo4j.

the class QuickImport method main.

public static void main(String[] arguments) throws IOException {
    Args args = Args.parse(arguments);
    long nodeCount = parseLongWithUnit(args.get("nodes", null));
    long relationshipCount = parseLongWithUnit(args.get("relationships", null));
    int labelCount = args.getNumber("labels", 4).intValue();
    int relationshipTypeCount = args.getNumber("relationship-types", 4).intValue();
    Path dir = Path.of(args.get("into"));
    long randomSeed = args.getNumber("random-seed", currentTimeMillis()).longValue();
    Configuration config = Configuration.COMMAS;
    Extractors extractors = new Extractors(config.arrayDelimiter());
    IdType idType = IdType.valueOf(args.get("id-type", IdType.INTEGER.name()));
    Groups groups = new Groups();
    Header nodeHeader = parseNodeHeader(args, idType, extractors, groups);
    Header relationshipHeader = parseRelationshipHeader(args, idType, extractors, groups);
    Config dbConfig;
    String dbConfigFileName = args.get("db-config", null);
    if (dbConfigFileName != null) {
        dbConfig = Config.newBuilder().fromFile(Path.of(dbConfigFileName)).build();
    } else {
        dbConfig = Config.defaults();
    }
    Boolean highIo = args.has("high-io") ? args.getBoolean("high-io") : null;
    LogProvider logging = NullLogProvider.getInstance();
    long pageCacheMemory = args.getNumber("pagecache-memory", org.neo4j.internal.batchimport.Configuration.MAX_PAGE_CACHE_MEMORY).longValue();
    org.neo4j.internal.batchimport.Configuration importConfig = new org.neo4j.internal.batchimport.Configuration.Overridden(defaultConfiguration(dir)) {

        @Override
        public int maxNumberOfProcessors() {
            return args.getNumber("processors", super.maxNumberOfProcessors()).intValue();
        }

        @Override
        public boolean highIO() {
            return highIo != null ? highIo : super.highIO();
        }

        @Override
        public long pageCacheMemory() {
            return pageCacheMemory;
        }

        @Override
        public long maxMemoryUsage() {
            String custom = args.get("max-memory", null);
            return custom != null ? parseMaxMemory(custom) : super.maxMemoryUsage();
        }

        @Override
        public IndexConfig indexConfig() {
            return IndexConfig.create().withLabelIndex().withRelationshipTypeIndex();
        }
    };
    float factorBadNodeData = args.getNumber("factor-bad-node-data", 0).floatValue();
    float factorBadRelationshipData = args.getNumber("factor-bad-relationship-data", 0).floatValue();
    Input input = new DataGeneratorInput(nodeCount, relationshipCount, idType, randomSeed, 0, nodeHeader, relationshipHeader, labelCount, relationshipTypeCount, factorBadNodeData, factorBadRelationshipData);
    try (FileSystemAbstraction fileSystem = new DefaultFileSystemAbstraction();
        Lifespan life = new Lifespan()) {
        BatchImporter consumer;
        if (args.getBoolean("to-csv")) {
            consumer = new CsvOutput(dir, nodeHeader, relationshipHeader, config);
        } else {
            System.out.println("Seed " + randomSeed);
            final JobScheduler jobScheduler = life.add(createScheduler());
            boolean verbose = args.getBoolean("v");
            ExecutionMonitor monitor = verbose ? new SpectrumExecutionMonitor(2, TimeUnit.SECONDS, System.out, 100) : defaultVisible();
            consumer = BatchImporterFactory.withHighestPriority().instantiate(DatabaseLayout.ofFlat(dir), fileSystem, PageCacheTracer.NULL, importConfig, new SimpleLogService(logging, logging), monitor, EMPTY, dbConfig, RecordFormatSelector.selectForConfig(dbConfig, logging), NO_MONITOR, jobScheduler, Collector.EMPTY, TransactionLogInitializer.getLogFilesInitializer(), new IndexImporterFactoryImpl(dbConfig), INSTANCE);
        }
        consumer.doImport(input);
    }
}
Also used : DefaultFileSystemAbstraction(org.neo4j.io.fs.DefaultFileSystemAbstraction) FileSystemAbstraction(org.neo4j.io.fs.FileSystemAbstraction) SpectrumExecutionMonitor(org.neo4j.internal.batchimport.staging.SpectrumExecutionMonitor) Configuration(org.neo4j.csv.reader.Configuration) Configuration.defaultConfiguration(org.neo4j.internal.batchimport.Configuration.defaultConfiguration) SimpleLogService(org.neo4j.logging.internal.SimpleLogService) Config(org.neo4j.configuration.Config) IndexConfig(org.neo4j.internal.batchimport.IndexConfig) DataGeneratorInput(org.neo4j.internal.batchimport.input.DataGeneratorInput) Input(org.neo4j.internal.batchimport.input.Input) BatchImporter(org.neo4j.internal.batchimport.BatchImporter) ParallelBatchImporter(org.neo4j.internal.batchimport.ParallelBatchImporter) Groups(org.neo4j.internal.batchimport.input.Groups) SpectrumExecutionMonitor(org.neo4j.internal.batchimport.staging.SpectrumExecutionMonitor) ExecutionMonitor(org.neo4j.internal.batchimport.staging.ExecutionMonitor) Path(java.nio.file.Path) JobScheduler(org.neo4j.scheduler.JobScheduler) Args(org.neo4j.internal.helpers.Args) DefaultFileSystemAbstraction(org.neo4j.io.fs.DefaultFileSystemAbstraction) IdType(org.neo4j.internal.batchimport.input.IdType) LogProvider(org.neo4j.logging.LogProvider) NullLogProvider(org.neo4j.logging.NullLogProvider) Extractors(org.neo4j.csv.reader.Extractors) Header(org.neo4j.internal.batchimport.input.csv.Header) IndexImporterFactoryImpl(org.neo4j.kernel.impl.index.schema.IndexImporterFactoryImpl) DataGeneratorInput(org.neo4j.internal.batchimport.input.DataGeneratorInput) Lifespan(org.neo4j.kernel.lifecycle.Lifespan)

Example 17 with IdType

use of org.neo4j.internal.batchimport.input.IdType in project neo4j by neo4j.

the class CsvInputTest method shouldCloseDataIteratorsInTheEnd.

@Test
public void shouldCloseDataIteratorsInTheEnd() throws Exception {
    // GIVEN
    CapturingDataFactories nodeData = new CapturingDataFactories(config -> charReader("1"), NO_DECORATOR);
    CapturingDataFactories relationshipData = new CapturingDataFactories(config -> charReader("1,1"), InputEntityDecorators.defaultRelationshipType("TYPE"));
    IdType idType = IdType.STRING;
    Input input = new CsvInput(nodeData, header(entry(null, Type.ID, CsvInput.idExtractor(idType, extractors))), relationshipData, header(entry(null, Type.START_ID, CsvInput.idExtractor(idType, extractors)), entry(null, Type.END_ID, CsvInput.idExtractor(idType, extractors))), idType, config(), NO_MONITOR, INSTANCE);
    // WHEN
    try (InputIterator iterator = input.nodes(EMPTY).iterator()) {
        readNext(iterator);
    }
    try (InputIterator iterator = input.relationships(EMPTY).iterator()) {
        readNext(iterator);
    }
    // THEN
    assertClosed(nodeData.last());
    assertClosed(relationshipData.last());
}
Also used : InputIterator(org.neo4j.internal.batchimport.InputIterator) Input(org.neo4j.internal.batchimport.input.Input) IdType(org.neo4j.internal.batchimport.input.IdType) Test(org.junit.Test)

Example 18 with IdType

use of org.neo4j.internal.batchimport.input.IdType in project neo4j by neo4j.

the class CsvInputTest method shouldHaveNodesBelongToGroupSpecifiedInHeader.

@Test
public void shouldHaveNodesBelongToGroupSpecifiedInHeader() throws Exception {
    // GIVEN
    IdType idType = IdType.INTEGER;
    Iterable<DataFactory> data = dataIterable(data("123,one\n" + "456,two"));
    Groups groups = new Groups();
    Group group = groups.getOrCreate("MyGroup");
    Input input = new CsvInput(data, header(entry(null, Type.ID, group.name(), CsvInput.idExtractor(idType, extractors)), entry("name", Type.PROPERTY, extractors.string())), datas(), defaultFormatRelationshipFileHeader(), idType, config(), NO_MONITOR, INSTANCE);
    // WHEN/THEN
    try (InputIterator nodes = input.nodes(EMPTY).iterator()) {
        assertNextNode(nodes, group, 123L, properties("name", "one"), labels());
        assertNextNode(nodes, group, 456L, properties("name", "two"), labels());
        assertFalse(readNext(nodes));
    }
}
Also used : Group(org.neo4j.internal.batchimport.input.Group) InputIterator(org.neo4j.internal.batchimport.InputIterator) Input(org.neo4j.internal.batchimport.input.Input) Groups(org.neo4j.internal.batchimport.input.Groups) IdType(org.neo4j.internal.batchimport.input.IdType) Test(org.junit.Test)

Example 19 with IdType

use of org.neo4j.internal.batchimport.input.IdType in project neo4j by neo4j.

the class CsvInputTest method shouldCalculateCorrectEstimatesForGZippedInputFile.

@Test
public void shouldCalculateCorrectEstimatesForGZippedInputFile() throws IOException {
    // GIVEN
    IdType idType = STRING;
    Path uncompressedFile = createNodeInputDataFile(mebiBytes(10));
    Path compressedFile = compressWithGZip(uncompressedFile);
    Assertions.assertThat(Files.size(compressedFile)).isLessThan(Files.size(uncompressedFile));
    // WHEN
    Input.Estimates uncompressedEstimates = calculateEstimatesOnSingleFileNodeData(idType, uncompressedFile);
    Input.Estimates compressedEstimates = calculateEstimatesOnSingleFileNodeData(idType, compressedFile);
    // then the compressed and uncompressed should be _roughly_ equal. The thing with GZIP is that there's no reliable way
    // of getting the uncompressed data size w/o decompressing it in its entirety, and this is why the estimator doesn't do this
    // but instead tries to estimate its compression rate after reading a chunk of it
    assertEstimatesEquals(uncompressedEstimates, compressedEstimates, 0.01);
}
Also used : Path(java.nio.file.Path) Input(org.neo4j.internal.batchimport.input.Input) IdType(org.neo4j.internal.batchimport.input.IdType) Test(org.junit.Test)

Aggregations

IdType (org.neo4j.internal.batchimport.input.IdType)19 Test (org.junit.jupiter.api.Test)11 CharSeeker (org.neo4j.csv.reader.CharSeeker)10 Extractors (org.neo4j.csv.reader.Extractors)10 Input (org.neo4j.internal.batchimport.input.Input)9 Test (org.junit.Test)7 DataFactories.defaultFormatNodeFileHeader (org.neo4j.internal.batchimport.input.csv.DataFactories.defaultFormatNodeFileHeader)6 DataFactories.defaultFormatRelationshipFileHeader (org.neo4j.internal.batchimport.input.csv.DataFactories.defaultFormatRelationshipFileHeader)6 InputIterator (org.neo4j.internal.batchimport.InputIterator)5 Path (java.nio.file.Path)3 Groups (org.neo4j.internal.batchimport.input.Groups)3 ParallelBatchImporter (org.neo4j.internal.batchimport.ParallelBatchImporter)2 DataGeneratorInput (org.neo4j.internal.batchimport.input.DataGeneratorInput)2 Group (org.neo4j.internal.batchimport.input.Group)2 JobScheduler (org.neo4j.scheduler.JobScheduler)2 Config (org.neo4j.configuration.Config)1 Configuration (org.neo4j.csv.reader.Configuration)1 BatchImporter (org.neo4j.internal.batchimport.BatchImporter)1 Configuration (org.neo4j.internal.batchimport.Configuration)1 Configuration.defaultConfiguration (org.neo4j.internal.batchimport.Configuration.defaultConfiguration)1