Search in sources :

Example 1 with Input

use of org.neo4j.internal.batchimport.input.Input in project neo4j by neo4j.

the class HumanUnderstandableExecutionMonitorIT method shouldReportProgressOfNodeImport.

@Test
void shouldReportProgressOfNodeImport() throws Exception {
    // given
    CapturingMonitor progress = new CapturingMonitor();
    HumanUnderstandableExecutionMonitor monitor = new HumanUnderstandableExecutionMonitor(progress);
    IdType idType = IdType.INTEGER;
    Input input = new DataGeneratorInput(NODE_COUNT, RELATIONSHIP_COUNT, idType, random.seed(), 0, bareboneNodeHeader(idType, new Extractors(';')), bareboneRelationshipHeader(idType, new Extractors(';')), 1, 1, 0, 0);
    Configuration configuration = new Configuration.Overridden(Configuration.DEFAULT) {

        @Override
        public long pageCacheMemory() {
            return mebiBytes(8);
        }
    };
    // when
    try (JobScheduler jobScheduler = new ThreadPoolJobScheduler()) {
        new ParallelBatchImporter(databaseLayout, fileSystem, NULL, configuration, NullLogService.getInstance(), monitor, EMPTY, defaults(), LATEST_RECORD_FORMATS, ImportLogic.NO_MONITOR, jobScheduler, Collector.EMPTY, LogFilesInitializer.NULL, IndexImporterFactory.EMPTY, EmptyMemoryTracker.INSTANCE).doImport(input);
        // then
        progress.assertAllProgressReachedEnd();
    }
}
Also used : JobScheduler(org.neo4j.scheduler.JobScheduler) ThreadPoolJobScheduler(org.neo4j.test.scheduler.ThreadPoolJobScheduler) ParallelBatchImporter(org.neo4j.internal.batchimport.ParallelBatchImporter) DataGeneratorInput(org.neo4j.internal.batchimport.input.DataGeneratorInput) Input(org.neo4j.internal.batchimport.input.Input) Extractors(org.neo4j.csv.reader.Extractors) Configuration(org.neo4j.internal.batchimport.Configuration) ThreadPoolJobScheduler(org.neo4j.test.scheduler.ThreadPoolJobScheduler) DataGeneratorInput(org.neo4j.internal.batchimport.input.DataGeneratorInput) IdType(org.neo4j.internal.batchimport.input.IdType) Test(org.junit.jupiter.api.Test)

Example 2 with Input

use of org.neo4j.internal.batchimport.input.Input in project neo4j by neo4j.

the class ImportPanicIT method shouldExitAndThrowExceptionOnPanic.

/**
 * There was this problem where some steps and in particular parallel CSV input parsing that
 * paniced would hang the import entirely.
 */
@Test
void shouldExitAndThrowExceptionOnPanic() throws Exception {
    try (JobScheduler jobScheduler = new ThreadPoolJobScheduler()) {
        BatchImporter importer = new ParallelBatchImporter(databaseLayout, testDirectory.getFileSystem(), PageCacheTracer.NULL, Configuration.DEFAULT, NullLogService.getInstance(), ExecutionMonitor.INVISIBLE, AdditionalInitialIds.EMPTY, Config.defaults(), StandardV3_4.RECORD_FORMATS, ImportLogic.NO_MONITOR, jobScheduler, Collector.EMPTY, LogFilesInitializer.NULL, IndexImporterFactory.EMPTY, EmptyMemoryTracker.INSTANCE);
        Iterable<DataFactory> nodeData = DataFactories.datas(DataFactories.data(InputEntityDecorators.NO_DECORATOR, fileAsCharReadable(nodeCsvFileWithBrokenEntries())));
        Input brokenCsvInput = new CsvInput(nodeData, DataFactories.defaultFormatNodeFileHeader(), DataFactories.datas(), DataFactories.defaultFormatRelationshipFileHeader(), IdType.ACTUAL, csvConfigurationWithLowBufferSize(), CsvInput.NO_MONITOR, INSTANCE);
        var e = assertThrows(InputException.class, () -> importer.doImport(brokenCsvInput));
        assertTrue(e.getCause() instanceof DataAfterQuoteException);
    }
}
Also used : JobScheduler(org.neo4j.scheduler.JobScheduler) ThreadPoolJobScheduler(org.neo4j.test.scheduler.ThreadPoolJobScheduler) CsvInput(org.neo4j.internal.batchimport.input.csv.CsvInput) Input(org.neo4j.internal.batchimport.input.Input) DataFactory(org.neo4j.internal.batchimport.input.csv.DataFactory) CsvInput(org.neo4j.internal.batchimport.input.csv.CsvInput) ThreadPoolJobScheduler(org.neo4j.test.scheduler.ThreadPoolJobScheduler) DataAfterQuoteException(org.neo4j.csv.reader.DataAfterQuoteException) Test(org.junit.jupiter.api.Test)

Example 3 with Input

use of org.neo4j.internal.batchimport.input.Input in project neo4j by neo4j.

the class CsvInputEstimateCalculationIT method shouldCalculateCorrectEstimates.

@Test
void shouldCalculateCorrectEstimates() throws Exception {
    // given a couple of input files of various layouts
    Input input = generateData();
    RecordFormats format = LATEST_RECORD_FORMATS;
    Input.Estimates estimates = input.calculateEstimates(new PropertyValueRecordSizeCalculator(format.property().getRecordSize(NO_STORE_HEADER), GraphDatabaseInternalSettings.string_block_size.defaultValue(), 0, GraphDatabaseInternalSettings.array_block_size.defaultValue(), 0));
    // when
    Config config = Config.defaults();
    FileSystemAbstraction fs = new DefaultFileSystemAbstraction();
    try (JobScheduler jobScheduler = new ThreadPoolJobScheduler()) {
        new ParallelBatchImporter(databaseLayout, fs, PageCacheTracer.NULL, PBI_CONFIG, NullLogService.getInstance(), INVISIBLE, EMPTY, config, format, ImportLogic.NO_MONITOR, jobScheduler, Collector.EMPTY, LogFilesInitializer.NULL, IndexImporterFactory.EMPTY, EmptyMemoryTracker.INSTANCE).doImport(input);
        // then compare estimates with actual disk sizes
        SingleFilePageSwapperFactory swapperFactory = new SingleFilePageSwapperFactory(fs);
        try (PageCache pageCache = new MuninnPageCache(swapperFactory, jobScheduler, MuninnPageCache.config(1000));
            NeoStores stores = new StoreFactory(databaseLayout, config, new DefaultIdGeneratorFactory(fs, immediate(), databaseLayout.getDatabaseName()), pageCache, fs, NullLogProvider.getInstance(), PageCacheTracer.NULL, writable()).openAllNeoStores()) {
            assertRoughlyEqual(estimates.numberOfNodes(), stores.getNodeStore().getNumberOfIdsInUse());
            assertRoughlyEqual(estimates.numberOfRelationships(), stores.getRelationshipStore().getNumberOfIdsInUse());
            assertRoughlyEqual(estimates.numberOfNodeProperties() + estimates.numberOfRelationshipProperties(), calculateNumberOfProperties(stores));
        }
        long measuredPropertyStorage = propertyStorageSize();
        long estimatedPropertyStorage = estimates.sizeOfNodeProperties() + estimates.sizeOfRelationshipProperties();
        assertThat(estimatedPropertyStorage).as("Estimated property storage size of %s must be within 10%% of the measured size of %s.", bytesToString(estimatedPropertyStorage), bytesToString(measuredPropertyStorage)).isCloseTo(measuredPropertyStorage, withPercentage(10.0));
    }
}
Also used : JobScheduler(org.neo4j.scheduler.JobScheduler) ThreadPoolJobScheduler(org.neo4j.test.scheduler.ThreadPoolJobScheduler) DefaultFileSystemAbstraction(org.neo4j.io.fs.DefaultFileSystemAbstraction) FileSystemAbstraction(org.neo4j.io.fs.FileSystemAbstraction) DefaultFileSystemAbstraction(org.neo4j.io.fs.DefaultFileSystemAbstraction) Config(org.neo4j.configuration.Config) DefaultIdGeneratorFactory(org.neo4j.internal.id.DefaultIdGeneratorFactory) StoreFactory(org.neo4j.kernel.impl.store.StoreFactory) ParallelBatchImporter(org.neo4j.internal.batchimport.ParallelBatchImporter) Input(org.neo4j.internal.batchimport.input.Input) RecordFormats(org.neo4j.kernel.impl.store.format.RecordFormats) MuninnPageCache(org.neo4j.io.pagecache.impl.muninn.MuninnPageCache) NeoStores(org.neo4j.kernel.impl.store.NeoStores) SingleFilePageSwapperFactory(org.neo4j.io.pagecache.impl.SingleFilePageSwapperFactory) ThreadPoolJobScheduler(org.neo4j.test.scheduler.ThreadPoolJobScheduler) PropertyValueRecordSizeCalculator(org.neo4j.kernel.impl.store.PropertyValueRecordSizeCalculator) PageCache(org.neo4j.io.pagecache.PageCache) MuninnPageCache(org.neo4j.io.pagecache.impl.muninn.MuninnPageCache) Test(org.junit.jupiter.api.Test)

Example 4 with Input

use of org.neo4j.internal.batchimport.input.Input in project neo4j by neo4j.

the class CsvInputTest method shouldParseDurationPropertyValues.

@Test
public void shouldParseDurationPropertyValues() throws Exception {
    // GIVEN
    DataFactory data = data(":ID,name,duration:Duration\n" + "0,Mattias,P3MT13H37M\n" + "1,Johan,\"P-1YT4H20M\"\n");
    Iterable<DataFactory> dataIterable = dataIterable(data);
    Input input = new CsvInput(dataIterable, defaultFormatNodeFileHeader(), datas(), defaultFormatRelationshipFileHeader(), ACTUAL, config(), NO_MONITOR, INSTANCE);
    // WHEN
    try (InputIterator nodes = input.nodes(EMPTY).iterator()) {
        // THEN
        assertNextNode(nodes, 0L, new Object[] { "name", "Mattias", "duration", DurationValue.duration(3, 0, 13 * 3600 + 37 * 60, 0) }, labels());
        assertNextNode(nodes, 1L, new Object[] { "name", "Johan", "duration", DurationValue.duration(-12, 0, 4 * 3600 + 20 * 60, 0) }, labels());
        assertFalse(readNext(nodes));
    }
}
Also used : InputIterator(org.neo4j.internal.batchimport.InputIterator) Input(org.neo4j.internal.batchimport.input.Input) Test(org.junit.Test)

Example 5 with Input

use of org.neo4j.internal.batchimport.input.Input in project neo4j by neo4j.

the class CsvInputTest method shouldProvideDefaultRelationshipType.

@Test
public void shouldProvideDefaultRelationshipType() throws Exception {
    // GIVEN
    String defaultType = "DEFAULT";
    String customType = "CUSTOM";
    DataFactory data = data(":START_ID,:END_ID,:TYPE\n" + "0,1,\n" + "1,2," + customType + "\n" + "2,1," + defaultType, InputEntityDecorators.defaultRelationshipType(defaultType));
    Iterable<DataFactory> dataIterable = dataIterable(data);
    Input input = new CsvInput(datas(), defaultFormatNodeFileHeader(), dataIterable, defaultFormatRelationshipFileHeader(), ACTUAL, config(), NO_MONITOR, INSTANCE);
    // WHEN/THEN
    try (InputIterator relationships = input.relationships(EMPTY).iterator()) {
        assertNextRelationship(relationships, 0L, 1L, defaultType, InputEntity.NO_PROPERTIES);
        assertNextRelationship(relationships, 1L, 2L, customType, InputEntity.NO_PROPERTIES);
        assertNextRelationship(relationships, 2L, 1L, defaultType, InputEntity.NO_PROPERTIES);
        assertFalse(readNext(relationships));
    }
}
Also used : InputIterator(org.neo4j.internal.batchimport.InputIterator) Input(org.neo4j.internal.batchimport.input.Input) Matchers.containsString(org.hamcrest.Matchers.containsString) ArgumentMatchers.anyString(org.mockito.ArgumentMatchers.anyString) Test(org.junit.Test)

Aggregations

Input (org.neo4j.internal.batchimport.input.Input)43 Test (org.junit.Test)36 InputIterator (org.neo4j.internal.batchimport.InputIterator)36 IdType (org.neo4j.internal.batchimport.input.IdType)8 Groups (org.neo4j.internal.batchimport.input.Groups)5 JobScheduler (org.neo4j.scheduler.JobScheduler)5 Matchers.containsString (org.hamcrest.Matchers.containsString)4 Test (org.junit.jupiter.api.Test)4 ArgumentMatchers.anyString (org.mockito.ArgumentMatchers.anyString)4 Config (org.neo4j.configuration.Config)3 Group (org.neo4j.internal.batchimport.input.Group)3 FileSystemAbstraction (org.neo4j.io.fs.FileSystemAbstraction)3 ThreadPoolJobScheduler (org.neo4j.test.scheduler.ThreadPoolJobScheduler)3 Path (java.nio.file.Path)2 ParallelBatchImporter (org.neo4j.internal.batchimport.ParallelBatchImporter)2 ExecutionMonitor (org.neo4j.internal.batchimport.staging.ExecutionMonitor)2 IndexImporterFactoryImpl (org.neo4j.kernel.impl.index.schema.IndexImporterFactoryImpl)2 NullLogProvider (org.neo4j.logging.NullLogProvider)2 IOException (java.io.IOException)1 PrintStream (java.io.PrintStream)1