Search in sources :

Example 6 with Collector

use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.

the class EncodingIdMapperTest method shouldDetectCorrectDuplicateInputIdsWhereManyAccidentalInManyGroups.

@Test
public void shouldDetectCorrectDuplicateInputIdsWhereManyAccidentalInManyGroups() throws Exception {
    // GIVEN
    final ControlledEncoder encoder = new ControlledEncoder(new LongEncoder());
    IdMapper mapper = mapper(encoder, Radix.LONG, NO_MONITOR);
    final int idsPerGroup = 20, groups = 5;
    final AtomicReference<Group> group = new AtomicReference<>();
    InputIterable<Object> ids = SimpleInputIteratorWrapper.wrap("source", new Iterable<Object>() {

        @Override
        public Iterator<Object> iterator() {
            return new PrefetchingIterator<Object>() {

                private int i;

                @Override
                protected Object fetchNextOrNull() {
                    // Change group every <idsPerGroup> id
                    if (i % idsPerGroup == 0) {
                        int groupId = i / idsPerGroup;
                        if (groupId == groups) {
                            return null;
                        }
                        group.set(new Group.Adapter(groupId, "Group " + groupId));
                    }
                    try {
                        // i.e. all first 10% in each group collides with all other first 10% in each group
                        if (i % idsPerGroup < 2) {
                            // Let these colliding values encode into the same eId as well,
                            // so that they are definitely marked as collisions
                            encoder.useThisIdToEncodeNoMatterWhatComesIn(Long.valueOf(1234567));
                            return Long.valueOf(i % idsPerGroup);
                        }
                        // The other 90% will be accidental collisions for something else
                        encoder.useThisIdToEncodeNoMatterWhatComesIn(Long.valueOf(123456 - group.get().id()));
                        return Long.valueOf(i);
                    } finally {
                        i++;
                    }
                }
            };
        }
    });
    // WHEN
    long actualId = 0;
    for (Object id : ids) {
        mapper.put(id, actualId++, group.get());
    }
    Collector collector = mock(Collector.class);
    mapper.prepare(ids, collector, NONE);
    // THEN
    verifyNoMoreInteractions(collector);
    actualId = 0;
    for (Object id : ids) {
        assertEquals(actualId++, mapper.get(id, group.get()));
    }
}
Also used : Group(org.neo4j.unsafe.impl.batchimport.input.Group) IdMapper(org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper) AtomicReference(java.util.concurrent.atomic.AtomicReference) ResourceIterator(org.neo4j.graphdb.ResourceIterator) PrimitiveLongIterator(org.neo4j.collection.primitive.PrimitiveLongIterator) SimpleInputIterator(org.neo4j.unsafe.impl.batchimport.input.SimpleInputIterator) PrefetchingIterator(org.neo4j.helpers.collection.PrefetchingIterator) InputIterator(org.neo4j.unsafe.impl.batchimport.InputIterator) Iterator(java.util.Iterator) Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) Collectors.badCollector(org.neo4j.unsafe.impl.batchimport.input.Collectors.badCollector) Test(org.junit.Test)

Example 7 with Collector

use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.

the class EncodingIdMapperTest method shouldHandleLargeAmountsOfDuplicateNodeIds.

@Test
public void shouldHandleLargeAmountsOfDuplicateNodeIds() throws Exception {
    // GIVEN
    IdMapper mapper = mapper(new LongEncoder(), Radix.LONG, NO_MONITOR);
    long nodeId = 0;
    int high = 10;
    // a list of input ids
    List<Object> ids = new ArrayList<>();
    for (int run = 0; run < 2; run++) {
        for (long i = 0; i < high / 2; i++) {
            ids.add(high - (i + 1));
            ids.add(i);
        }
    }
    // fed to the IdMapper
    for (Object inputId : ids) {
        mapper.put(inputId, nodeId++, GLOBAL);
    }
    // WHEN
    Collector collector = mock(Collector.class);
    mapper.prepare(SimpleInputIteratorWrapper.wrap("source", ids), collector, NONE);
    // THEN
    verify(collector, times(high)).collectDuplicateNode(any(Object.class), anyLong(), anyString(), anyString(), anyString());
}
Also used : ArrayList(java.util.ArrayList) Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) Collectors.badCollector(org.neo4j.unsafe.impl.batchimport.input.Collectors.badCollector) IdMapper(org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper) Test(org.junit.Test)

Example 8 with Collector

use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.

the class EncodingIdMapperTest method shouldReportCollisionsForSameInputId.

@Test
public void shouldReportCollisionsForSameInputId() throws Exception {
    // GIVEN
    IdMapper mapper = mapper(new StringEncoder(), Radix.STRING, NO_MONITOR);
    InputIterable<Object> ids = wrap("source", Arrays.<Object>asList("10", "9", "10"));
    try (ResourceIterator<Object> iterator = ids.iterator()) {
        for (int i = 0; iterator.hasNext(); i++) {
            mapper.put(iterator.next(), i, GLOBAL);
        }
    }
    // WHEN
    Collector collector = mock(Collector.class);
    mapper.prepare(ids, collector, NONE);
    // THEN
    verify(collector, times(1)).collectDuplicateNode("10", 2, GLOBAL.name(), "source:1", "source:3");
    verifyNoMoreInteractions(collector);
}
Also used : Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) Collectors.badCollector(org.neo4j.unsafe.impl.batchimport.input.Collectors.badCollector) IdMapper(org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper) Test(org.junit.Test)

Example 9 with Collector

use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.

the class ImportTool method main.

/**
     * Runs the import tool given the supplied arguments.
     *
     * @param incomingArguments arguments for specifying input and configuration for the import.
     * @param defaultSettingsSuitableForTests default configuration geared towards unit/integration
     * test environments, for example lower default buffer sizes.
     */
public static void main(String[] incomingArguments, boolean defaultSettingsSuitableForTests) throws IOException {
    System.err.println("WARNING: neo4j-import is deprecated and support for it will be removed in a future\n" + "version of Neo4j; please use neo4j-admin import instead.\n");
    PrintStream out = System.out;
    PrintStream err = System.err;
    Args args = Args.parse(incomingArguments);
    if (ArrayUtil.isEmpty(incomingArguments) || asksForUsage(args)) {
        printUsage(out);
        return;
    }
    File storeDir;
    Collection<Option<File[]>> nodesFiles, relationshipsFiles;
    boolean enableStacktrace;
    Number processors = null;
    Input input = null;
    int badTolerance;
    Charset inputEncoding;
    boolean skipBadRelationships, skipDuplicateNodes, ignoreExtraColumns;
    Config dbConfig;
    OutputStream badOutput = null;
    IdType idType = null;
    int pageSize = UNSPECIFIED;
    Collector badCollector;
    org.neo4j.unsafe.impl.batchimport.Configuration configuration = null;
    File logsDir;
    File badFile;
    boolean success = false;
    try (FileSystemAbstraction fs = new DefaultFileSystemAbstraction()) {
        storeDir = args.interpretOption(Options.STORE_DIR.key(), Converters.<File>mandatory(), Converters.toFile(), Validators.DIRECTORY_IS_WRITABLE, Validators.CONTAINS_NO_EXISTING_DATABASE);
        Config config = Config.defaults();
        config.augment(stringMap(GraphDatabaseSettings.neo4j_home.name(), storeDir.getAbsolutePath()));
        logsDir = config.get(GraphDatabaseSettings.logs_directory);
        fs.mkdirs(logsDir);
        badFile = new File(storeDir, BAD_FILE_NAME);
        badOutput = new BufferedOutputStream(fs.openAsOutputStream(badFile, false));
        nodesFiles = extractInputFiles(args, Options.NODE_DATA.key(), err);
        relationshipsFiles = extractInputFiles(args, Options.RELATIONSHIP_DATA.key(), err);
        validateInputFiles(nodesFiles, relationshipsFiles);
        enableStacktrace = args.getBoolean(Options.STACKTRACE.key(), Boolean.FALSE, Boolean.TRUE);
        processors = args.getNumber(Options.PROCESSORS.key(), null);
        idType = args.interpretOption(Options.ID_TYPE.key(), withDefault((IdType) Options.ID_TYPE.defaultValue()), TO_ID_TYPE);
        badTolerance = parseNumberOrUnlimited(args, Options.BAD_TOLERANCE);
        inputEncoding = Charset.forName(args.get(Options.INPUT_ENCODING.key(), defaultCharset().name()));
        skipBadRelationships = args.getBoolean(Options.SKIP_BAD_RELATIONSHIPS.key(), (Boolean) Options.SKIP_BAD_RELATIONSHIPS.defaultValue(), true);
        skipDuplicateNodes = args.getBoolean(Options.SKIP_DUPLICATE_NODES.key(), (Boolean) Options.SKIP_DUPLICATE_NODES.defaultValue(), true);
        ignoreExtraColumns = args.getBoolean(Options.IGNORE_EXTRA_COLUMNS.key(), (Boolean) Options.IGNORE_EXTRA_COLUMNS.defaultValue(), true);
        badCollector = badCollector(badOutput, badTolerance, collect(skipBadRelationships, skipDuplicateNodes, ignoreExtraColumns));
        dbConfig = loadDbConfig(args.interpretOption(Options.DATABASE_CONFIG.key(), Converters.<File>optional(), Converters.toFile(), Validators.REGEX_FILE_EXISTS));
        configuration = importConfiguration(processors, defaultSettingsSuitableForTests, dbConfig, pageSize);
        input = new CsvInput(nodeData(inputEncoding, nodesFiles), defaultFormatNodeFileHeader(), relationshipData(inputEncoding, relationshipsFiles), defaultFormatRelationshipFileHeader(), idType, csvConfiguration(args, defaultSettingsSuitableForTests), badCollector, configuration.maxNumberOfProcessors());
        doImport(out, err, storeDir, logsDir, badFile, fs, nodesFiles, relationshipsFiles, enableStacktrace, input, dbConfig, badOutput, configuration);
        success = true;
    } catch (IllegalArgumentException e) {
        throw andPrintError("Input error", e, false, err);
    } catch (IOException e) {
        throw andPrintError("File error", e, false, err);
    } finally {
        if (!success && badOutput != null) {
            badOutput.close();
        }
    }
}
Also used : DefaultFileSystemAbstraction(org.neo4j.io.fs.DefaultFileSystemAbstraction) FileSystemAbstraction(org.neo4j.io.fs.FileSystemAbstraction) Config(org.neo4j.kernel.configuration.Config) BufferedOutputStream(java.io.BufferedOutputStream) OutputStream(java.io.OutputStream) CsvInput(org.neo4j.unsafe.impl.batchimport.input.csv.CsvInput) Input(org.neo4j.unsafe.impl.batchimport.input.Input) BadCollector(org.neo4j.unsafe.impl.batchimport.input.BadCollector) Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) Collectors.badCollector(org.neo4j.unsafe.impl.batchimport.input.Collectors.badCollector) BufferedOutputStream(java.io.BufferedOutputStream) PrintStream(java.io.PrintStream) Args(org.neo4j.helpers.Args) DefaultFileSystemAbstraction(org.neo4j.io.fs.DefaultFileSystemAbstraction) Charset.defaultCharset(java.nio.charset.Charset.defaultCharset) Charset(java.nio.charset.Charset) CsvInput(org.neo4j.unsafe.impl.batchimport.input.csv.CsvInput) IOException(java.io.IOException) IdType(org.neo4j.unsafe.impl.batchimport.input.csv.IdType) Option(org.neo4j.helpers.Args.Option) StoreFile(org.neo4j.kernel.impl.storemigration.StoreFile) File(java.io.File)

Example 10 with Collector

use of org.neo4j.unsafe.impl.batchimport.input.Collector in project neo4j by neo4j.

the class ImportTool method doImport.

public static void doImport(PrintStream out, PrintStream err, File storeDir, File logsDir, File badFile, FileSystemAbstraction fs, Collection<Option<File[]>> nodesFiles, Collection<Option<File[]>> relationshipsFiles, boolean enableStacktrace, Input input, Config dbConfig, OutputStream badOutput, org.neo4j.unsafe.impl.batchimport.Configuration configuration) throws IOException {
    boolean success;
    LifeSupport life = new LifeSupport();
    LogService logService = life.add(StoreLogService.inLogsDirectory(fs, logsDir));
    life.start();
    //TODO: add file watcher here?
    BatchImporter importer = new ParallelBatchImporter(storeDir, fs, configuration, logService, ExecutionMonitors.defaultVisible(), dbConfig);
    printOverview(storeDir, nodesFiles, relationshipsFiles, configuration, out);
    success = false;
    try {
        importer.doImport(input);
        success = true;
    } catch (Exception e) {
        throw andPrintError("Import error", e, enableStacktrace, err);
    } finally {
        Collector collector = input.badCollector();
        int numberOfBadEntries = collector.badEntries();
        collector.close();
        badOutput.close();
        if (numberOfBadEntries > 0) {
            out.println("There were bad entries which were skipped and logged into " + badFile.getAbsolutePath());
        }
        life.shutdown();
        if (!success) {
            try {
                StoreFile.fileOperation(FileOperation.DELETE, fs, storeDir, null, Iterables.<StoreFile, StoreFile>iterable(StoreFile.values()), false, ExistingTargetStrategy.FAIL, StoreFileType.values());
            } catch (IOException e) {
                err.println("Unable to delete store files after an aborted import " + e);
                if (enableStacktrace) {
                    e.printStackTrace();
                }
            }
        }
    }
}
Also used : ParallelBatchImporter(org.neo4j.unsafe.impl.batchimport.ParallelBatchImporter) BatchImporter(org.neo4j.unsafe.impl.batchimport.BatchImporter) ParallelBatchImporter(org.neo4j.unsafe.impl.batchimport.ParallelBatchImporter) BadCollector(org.neo4j.unsafe.impl.batchimport.input.BadCollector) Collector(org.neo4j.unsafe.impl.batchimport.input.Collector) Collectors.badCollector(org.neo4j.unsafe.impl.batchimport.input.Collectors.badCollector) LifeSupport(org.neo4j.kernel.lifecycle.LifeSupport) IOException(java.io.IOException) LogService(org.neo4j.kernel.impl.logging.LogService) StoreLogService(org.neo4j.kernel.impl.logging.StoreLogService) IllegalMultilineFieldException(org.neo4j.csv.reader.IllegalMultilineFieldException) MissingRelationshipDataException(org.neo4j.unsafe.impl.batchimport.input.MissingRelationshipDataException) DuplicateInputIdException(org.neo4j.unsafe.impl.batchimport.cache.idmapping.string.DuplicateInputIdException) InputException(org.neo4j.unsafe.impl.batchimport.input.InputException) Exceptions.launderedException(org.neo4j.helpers.Exceptions.launderedException) IOException(java.io.IOException)

Aggregations

Collector (org.neo4j.unsafe.impl.batchimport.input.Collector)16 Test (org.junit.Test)12 Collectors.badCollector (org.neo4j.unsafe.impl.batchimport.input.Collectors.badCollector)9 IdMapper (org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper)7 InputNode (org.neo4j.unsafe.impl.batchimport.input.InputNode)6 Groups (org.neo4j.unsafe.impl.batchimport.input.Groups)4 IOException (java.io.IOException)3 Monitor (org.neo4j.unsafe.impl.batchimport.cache.idmapping.string.EncodingIdMapper.Monitor)3 BadCollector (org.neo4j.unsafe.impl.batchimport.input.BadCollector)3 Input (org.neo4j.unsafe.impl.batchimport.input.Input)3 BufferedOutputStream (java.io.BufferedOutputStream)2 File (java.io.File)2 OutputStream (java.io.OutputStream)2 ArrayList (java.util.ArrayList)2 Matchers.anyString (org.mockito.Matchers.anyString)2 PrimitiveLongIterator (org.neo4j.collection.primitive.PrimitiveLongIterator)2 ProgressListener (org.neo4j.helpers.progress.ProgressListener)2 FileSystemAbstraction (org.neo4j.io.fs.FileSystemAbstraction)2 TaskExecutionPanicException (org.neo4j.unsafe.impl.batchimport.executor.TaskExecutionPanicException)2 Group (org.neo4j.unsafe.impl.batchimport.input.Group)2