Search in sources :

Example 16 with Input

use of org.neo4j.internal.batchimport.input.Input in project neo4j by neo4j.

the class CsvInputTest method shouldParseDatePropertyValues.

@Test
public void shouldParseDatePropertyValues() throws Exception {
    // GIVEN
    DataFactory data = data(":ID,name,date:Date\n" + "0,Mattias,2018-02-27\n" + "1,Johan,2018-03-01\n");
    Iterable<DataFactory> dataIterable = dataIterable(data);
    Input input = new CsvInput(dataIterable, defaultFormatNodeFileHeader(), datas(), defaultFormatRelationshipFileHeader(), ACTUAL, config(), NO_MONITOR, INSTANCE);
    // WHEN
    try (InputIterator nodes = input.nodes(EMPTY).iterator()) {
        // THEN
        assertNextNode(nodes, 0L, new Object[] { "name", "Mattias", "date", DateValue.date(2018, 2, 27) }, labels());
        assertNextNode(nodes, 1L, new Object[] { "name", "Johan", "date", DateValue.date(2018, 3, 1) }, labels());
        assertFalse(readNext(nodes));
    }
}
Also used : InputIterator(org.neo4j.internal.batchimport.InputIterator) Input(org.neo4j.internal.batchimport.input.Input) Test(org.junit.Test)

Example 17 with Input

use of org.neo4j.internal.batchimport.input.Input in project neo4j by neo4j.

the class CsvInputTest method shouldIgnoreEmptyExtraColumns.

@Test
public void shouldIgnoreEmptyExtraColumns() throws Exception {
    // GIVEN
    Iterable<DataFactory> data = datas(CsvInputTest.data(":ID,one\n" + "1,test,\n" + "2,test,,additional"));
    // WHEN
    Collector collector = mock(Collector.class);
    Input input = new CsvInput(data, defaultFormatNodeFileHeader(), datas(), defaultFormatRelationshipFileHeader(), IdType.INTEGER, config(), NO_MONITOR, INSTANCE);
    // THEN
    try (InputIterator nodes = input.nodes(collector).iterator()) {
        // THEN
        assertNextNode(nodes, 1L, properties("one", "test"), labels());
        assertNextNode(nodes, 2L, properties("one", "test"), labels());
        assertFalse(readNext(nodes));
    }
    verify(collector).collectExtraColumns(anyString(), eq(1L), eq(null));
    verify(collector).collectExtraColumns(anyString(), eq(2L), eq(null));
    verify(collector).collectExtraColumns(anyString(), eq(2L), eq("additional"));
}
Also used : InputIterator(org.neo4j.internal.batchimport.InputIterator) Input(org.neo4j.internal.batchimport.input.Input) Collector(org.neo4j.internal.batchimport.input.Collector) Test(org.junit.Test)

Example 18 with Input

use of org.neo4j.internal.batchimport.input.Input in project neo4j by neo4j.

the class CsvInputTest method shouldHandleMultipleInputGroups.

@Test
public void shouldHandleMultipleInputGroups() throws Exception {
    // GIVEN multiple input groups, each with their own, specific, header
    DataFactory group1 = data(":ID,name,kills:int,health:int\n" + "1,Jim,10,100\n" + "2,Abathur,0,200\n");
    DataFactory group2 = data(":ID,type\n" + "3,zergling\n" + "4,csv\n");
    Iterable<DataFactory> data = dataIterable(group1, group2);
    Input input = new CsvInput(data, defaultFormatNodeFileHeader(), datas(), defaultFormatRelationshipFileHeader(), IdType.STRING, config(), NO_MONITOR, INSTANCE);
    // WHEN iterating over them, THEN the expected data should come out
    try (InputIterator nodes = input.nodes(EMPTY).iterator()) {
        assertNextNode(nodes, "1", properties("name", "Jim", "kills", 10, "health", 100), labels());
        assertNextNode(nodes, "2", properties("name", "Abathur", "kills", 0, "health", 200), labels());
        assertNextNode(nodes, "3", properties("type", "zergling"), labels());
        assertNextNode(nodes, "4", properties("type", "csv"), labels());
        assertFalse(readNext(nodes));
    }
}
Also used : InputIterator(org.neo4j.internal.batchimport.InputIterator) Input(org.neo4j.internal.batchimport.input.Input) Test(org.junit.Test)

Example 19 with Input

use of org.neo4j.internal.batchimport.input.Input in project neo4j by neo4j.

the class ImportLogic method initialize.

public void initialize(Input input) throws IOException {
    log.info("Import starting");
    startTime = currentTimeMillis();
    this.input = input;
    PageCacheArrayFactoryMonitor numberArrayFactoryMonitor = new PageCacheArrayFactoryMonitor();
    numberArrayFactory = auto(neoStore.getPageCache(), pageCacheTracer, databaseDirectory, config.allowCacheAllocationOnHeap(), numberArrayFactoryMonitor, log, databaseName);
    // Some temporary caches and indexes in the import
    idMapper = instantiateIdMapper(input);
    nodeRelationshipCache = new NodeRelationshipCache(numberArrayFactory, dbConfig.get(GraphDatabaseSettings.dense_node_threshold), memoryTracker);
    Input.Estimates inputEstimates = input.calculateEstimates(neoStore.getPropertyStore().newValueEncodedSizeCalculator());
    // Sanity checking against estimates
    new EstimationSanityChecker(recordFormats, monitor).sanityCheck(inputEstimates);
    new HeapSizeSanityChecker(monitor).sanityCheck(inputEstimates, recordFormats, neoStore, NodeRelationshipCache.memoryEstimation(inputEstimates.numberOfNodes()), idMapper.memoryEstimation(inputEstimates.numberOfNodes()));
    dependencies.satisfyDependencies(inputEstimates, idMapper, neoStore, nodeRelationshipCache, numberArrayFactoryMonitor);
    if (neoStore.determineDoubleRelationshipRecordUnits(inputEstimates)) {
        monitor.doubleRelationshipRecordUnitsEnabled();
    }
    executionMonitor.initialize(dependencies);
}
Also used : NodeRelationshipCache(org.neo4j.internal.batchimport.cache.NodeRelationshipCache) Input(org.neo4j.internal.batchimport.input.Input) PageCacheArrayFactoryMonitor(org.neo4j.internal.batchimport.cache.PageCacheArrayFactoryMonitor) EstimationSanityChecker(org.neo4j.internal.batchimport.input.EstimationSanityChecker)

Example 20 with Input

use of org.neo4j.internal.batchimport.input.Input in project neo4j by neo4j.

the class QuickImport method main.

public static void main(String[] arguments) throws IOException {
    Args args = Args.parse(arguments);
    long nodeCount = parseLongWithUnit(args.get("nodes", null));
    long relationshipCount = parseLongWithUnit(args.get("relationships", null));
    int labelCount = args.getNumber("labels", 4).intValue();
    int relationshipTypeCount = args.getNumber("relationship-types", 4).intValue();
    Path dir = Path.of(args.get("into"));
    long randomSeed = args.getNumber("random-seed", currentTimeMillis()).longValue();
    Configuration config = Configuration.COMMAS;
    Extractors extractors = new Extractors(config.arrayDelimiter());
    IdType idType = IdType.valueOf(args.get("id-type", IdType.INTEGER.name()));
    Groups groups = new Groups();
    Header nodeHeader = parseNodeHeader(args, idType, extractors, groups);
    Header relationshipHeader = parseRelationshipHeader(args, idType, extractors, groups);
    Config dbConfig;
    String dbConfigFileName = args.get("db-config", null);
    if (dbConfigFileName != null) {
        dbConfig = Config.newBuilder().fromFile(Path.of(dbConfigFileName)).build();
    } else {
        dbConfig = Config.defaults();
    }
    Boolean highIo = args.has("high-io") ? args.getBoolean("high-io") : null;
    LogProvider logging = NullLogProvider.getInstance();
    long pageCacheMemory = args.getNumber("pagecache-memory", org.neo4j.internal.batchimport.Configuration.MAX_PAGE_CACHE_MEMORY).longValue();
    org.neo4j.internal.batchimport.Configuration importConfig = new org.neo4j.internal.batchimport.Configuration.Overridden(defaultConfiguration(dir)) {

        @Override
        public int maxNumberOfProcessors() {
            return args.getNumber("processors", super.maxNumberOfProcessors()).intValue();
        }

        @Override
        public boolean highIO() {
            return highIo != null ? highIo : super.highIO();
        }

        @Override
        public long pageCacheMemory() {
            return pageCacheMemory;
        }

        @Override
        public long maxMemoryUsage() {
            String custom = args.get("max-memory", null);
            return custom != null ? parseMaxMemory(custom) : super.maxMemoryUsage();
        }

        @Override
        public IndexConfig indexConfig() {
            return IndexConfig.create().withLabelIndex().withRelationshipTypeIndex();
        }
    };
    float factorBadNodeData = args.getNumber("factor-bad-node-data", 0).floatValue();
    float factorBadRelationshipData = args.getNumber("factor-bad-relationship-data", 0).floatValue();
    Input input = new DataGeneratorInput(nodeCount, relationshipCount, idType, randomSeed, 0, nodeHeader, relationshipHeader, labelCount, relationshipTypeCount, factorBadNodeData, factorBadRelationshipData);
    try (FileSystemAbstraction fileSystem = new DefaultFileSystemAbstraction();
        Lifespan life = new Lifespan()) {
        BatchImporter consumer;
        if (args.getBoolean("to-csv")) {
            consumer = new CsvOutput(dir, nodeHeader, relationshipHeader, config);
        } else {
            System.out.println("Seed " + randomSeed);
            final JobScheduler jobScheduler = life.add(createScheduler());
            boolean verbose = args.getBoolean("v");
            ExecutionMonitor monitor = verbose ? new SpectrumExecutionMonitor(2, TimeUnit.SECONDS, System.out, 100) : defaultVisible();
            consumer = BatchImporterFactory.withHighestPriority().instantiate(DatabaseLayout.ofFlat(dir), fileSystem, PageCacheTracer.NULL, importConfig, new SimpleLogService(logging, logging), monitor, EMPTY, dbConfig, RecordFormatSelector.selectForConfig(dbConfig, logging), NO_MONITOR, jobScheduler, Collector.EMPTY, TransactionLogInitializer.getLogFilesInitializer(), new IndexImporterFactoryImpl(dbConfig), INSTANCE);
        }
        consumer.doImport(input);
    }
}
Also used : DefaultFileSystemAbstraction(org.neo4j.io.fs.DefaultFileSystemAbstraction) FileSystemAbstraction(org.neo4j.io.fs.FileSystemAbstraction) SpectrumExecutionMonitor(org.neo4j.internal.batchimport.staging.SpectrumExecutionMonitor) Configuration(org.neo4j.csv.reader.Configuration) Configuration.defaultConfiguration(org.neo4j.internal.batchimport.Configuration.defaultConfiguration) SimpleLogService(org.neo4j.logging.internal.SimpleLogService) Config(org.neo4j.configuration.Config) IndexConfig(org.neo4j.internal.batchimport.IndexConfig) DataGeneratorInput(org.neo4j.internal.batchimport.input.DataGeneratorInput) Input(org.neo4j.internal.batchimport.input.Input) BatchImporter(org.neo4j.internal.batchimport.BatchImporter) ParallelBatchImporter(org.neo4j.internal.batchimport.ParallelBatchImporter) Groups(org.neo4j.internal.batchimport.input.Groups) SpectrumExecutionMonitor(org.neo4j.internal.batchimport.staging.SpectrumExecutionMonitor) ExecutionMonitor(org.neo4j.internal.batchimport.staging.ExecutionMonitor) Path(java.nio.file.Path) JobScheduler(org.neo4j.scheduler.JobScheduler) Args(org.neo4j.internal.helpers.Args) DefaultFileSystemAbstraction(org.neo4j.io.fs.DefaultFileSystemAbstraction) IdType(org.neo4j.internal.batchimport.input.IdType) LogProvider(org.neo4j.logging.LogProvider) NullLogProvider(org.neo4j.logging.NullLogProvider) Extractors(org.neo4j.csv.reader.Extractors) Header(org.neo4j.internal.batchimport.input.csv.Header) IndexImporterFactoryImpl(org.neo4j.kernel.impl.index.schema.IndexImporterFactoryImpl) DataGeneratorInput(org.neo4j.internal.batchimport.input.DataGeneratorInput) Lifespan(org.neo4j.kernel.lifecycle.Lifespan)

Aggregations

Input (org.neo4j.internal.batchimport.input.Input)43 Test (org.junit.Test)36 InputIterator (org.neo4j.internal.batchimport.InputIterator)36 IdType (org.neo4j.internal.batchimport.input.IdType)8 Groups (org.neo4j.internal.batchimport.input.Groups)5 JobScheduler (org.neo4j.scheduler.JobScheduler)5 Matchers.containsString (org.hamcrest.Matchers.containsString)4 Test (org.junit.jupiter.api.Test)4 ArgumentMatchers.anyString (org.mockito.ArgumentMatchers.anyString)4 Config (org.neo4j.configuration.Config)3 Group (org.neo4j.internal.batchimport.input.Group)3 FileSystemAbstraction (org.neo4j.io.fs.FileSystemAbstraction)3 ThreadPoolJobScheduler (org.neo4j.test.scheduler.ThreadPoolJobScheduler)3 Path (java.nio.file.Path)2 ParallelBatchImporter (org.neo4j.internal.batchimport.ParallelBatchImporter)2 ExecutionMonitor (org.neo4j.internal.batchimport.staging.ExecutionMonitor)2 IndexImporterFactoryImpl (org.neo4j.kernel.impl.index.schema.IndexImporterFactoryImpl)2 NullLogProvider (org.neo4j.logging.NullLogProvider)2 IOException (java.io.IOException)1 PrintStream (java.io.PrintStream)1