Search in sources :

Example 21 with Extractors

use of org.neo4j.csv.reader.Extractors in project neo4j by neo4j.

the class QuickImport method main.

public static void main(String[] arguments) throws IOException {
    Args args = Args.parse(arguments);
    long nodeCount = parseLongWithUnit(args.get("nodes", null));
    long relationshipCount = parseLongWithUnit(args.get("relationships", null));
    int labelCount = args.getNumber("labels", 4).intValue();
    int relationshipTypeCount = args.getNumber("relationship-types", 4).intValue();
    Path dir = Path.of(args.get("into"));
    long randomSeed = args.getNumber("random-seed", currentTimeMillis()).longValue();
    Configuration config = Configuration.COMMAS;
    Extractors extractors = new Extractors(config.arrayDelimiter());
    IdType idType = IdType.valueOf(args.get("id-type", IdType.INTEGER.name()));
    Groups groups = new Groups();
    Header nodeHeader = parseNodeHeader(args, idType, extractors, groups);
    Header relationshipHeader = parseRelationshipHeader(args, idType, extractors, groups);
    Config dbConfig;
    String dbConfigFileName = args.get("db-config", null);
    if (dbConfigFileName != null) {
        dbConfig = Config.newBuilder().fromFile(Path.of(dbConfigFileName)).build();
    } else {
        dbConfig = Config.defaults();
    }
    Boolean highIo = args.has("high-io") ? args.getBoolean("high-io") : null;
    LogProvider logging = NullLogProvider.getInstance();
    long pageCacheMemory = args.getNumber("pagecache-memory", org.neo4j.internal.batchimport.Configuration.MAX_PAGE_CACHE_MEMORY).longValue();
    org.neo4j.internal.batchimport.Configuration importConfig = new org.neo4j.internal.batchimport.Configuration.Overridden(defaultConfiguration(dir)) {

        @Override
        public int maxNumberOfProcessors() {
            return args.getNumber("processors", super.maxNumberOfProcessors()).intValue();
        }

        @Override
        public boolean highIO() {
            return highIo != null ? highIo : super.highIO();
        }

        @Override
        public long pageCacheMemory() {
            return pageCacheMemory;
        }

        @Override
        public long maxMemoryUsage() {
            String custom = args.get("max-memory", null);
            return custom != null ? parseMaxMemory(custom) : super.maxMemoryUsage();
        }

        @Override
        public IndexConfig indexConfig() {
            return IndexConfig.create().withLabelIndex().withRelationshipTypeIndex();
        }
    };
    float factorBadNodeData = args.getNumber("factor-bad-node-data", 0).floatValue();
    float factorBadRelationshipData = args.getNumber("factor-bad-relationship-data", 0).floatValue();
    Input input = new DataGeneratorInput(nodeCount, relationshipCount, idType, randomSeed, 0, nodeHeader, relationshipHeader, labelCount, relationshipTypeCount, factorBadNodeData, factorBadRelationshipData);
    try (FileSystemAbstraction fileSystem = new DefaultFileSystemAbstraction();
        Lifespan life = new Lifespan()) {
        BatchImporter consumer;
        if (args.getBoolean("to-csv")) {
            consumer = new CsvOutput(dir, nodeHeader, relationshipHeader, config);
        } else {
            System.out.println("Seed " + randomSeed);
            final JobScheduler jobScheduler = life.add(createScheduler());
            boolean verbose = args.getBoolean("v");
            ExecutionMonitor monitor = verbose ? new SpectrumExecutionMonitor(2, TimeUnit.SECONDS, System.out, 100) : defaultVisible();
            consumer = BatchImporterFactory.withHighestPriority().instantiate(DatabaseLayout.ofFlat(dir), fileSystem, PageCacheTracer.NULL, importConfig, new SimpleLogService(logging, logging), monitor, EMPTY, dbConfig, RecordFormatSelector.selectForConfig(dbConfig, logging), NO_MONITOR, jobScheduler, Collector.EMPTY, TransactionLogInitializer.getLogFilesInitializer(), new IndexImporterFactoryImpl(dbConfig), INSTANCE);
        }
        consumer.doImport(input);
    }
}
Also used : DefaultFileSystemAbstraction(org.neo4j.io.fs.DefaultFileSystemAbstraction) FileSystemAbstraction(org.neo4j.io.fs.FileSystemAbstraction) SpectrumExecutionMonitor(org.neo4j.internal.batchimport.staging.SpectrumExecutionMonitor) Configuration(org.neo4j.csv.reader.Configuration) Configuration.defaultConfiguration(org.neo4j.internal.batchimport.Configuration.defaultConfiguration) SimpleLogService(org.neo4j.logging.internal.SimpleLogService) Config(org.neo4j.configuration.Config) IndexConfig(org.neo4j.internal.batchimport.IndexConfig) DataGeneratorInput(org.neo4j.internal.batchimport.input.DataGeneratorInput) Input(org.neo4j.internal.batchimport.input.Input) BatchImporter(org.neo4j.internal.batchimport.BatchImporter) ParallelBatchImporter(org.neo4j.internal.batchimport.ParallelBatchImporter) Groups(org.neo4j.internal.batchimport.input.Groups) SpectrumExecutionMonitor(org.neo4j.internal.batchimport.staging.SpectrumExecutionMonitor) ExecutionMonitor(org.neo4j.internal.batchimport.staging.ExecutionMonitor) Path(java.nio.file.Path) JobScheduler(org.neo4j.scheduler.JobScheduler) Args(org.neo4j.internal.helpers.Args) DefaultFileSystemAbstraction(org.neo4j.io.fs.DefaultFileSystemAbstraction) IdType(org.neo4j.internal.batchimport.input.IdType) LogProvider(org.neo4j.logging.LogProvider) NullLogProvider(org.neo4j.logging.NullLogProvider) Extractors(org.neo4j.csv.reader.Extractors) Header(org.neo4j.internal.batchimport.input.csv.Header) IndexImporterFactoryImpl(org.neo4j.kernel.impl.index.schema.IndexImporterFactoryImpl) DataGeneratorInput(org.neo4j.internal.batchimport.input.DataGeneratorInput) Lifespan(org.neo4j.kernel.lifecycle.Lifespan)

Example 22 with Extractors

use of org.neo4j.csv.reader.Extractors in project neo4j by neo4j.

the class CsvInputParser method next.

boolean next(InputEntityVisitor visitor) throws IOException {
    lineNumber++;
    int i = 0;
    Header.Entry entry = null;
    Header.Entry[] entries = header.entries();
    try {
        boolean doContinue = true;
        for (i = 0; i < entries.length && doContinue; i++) {
            entry = entries[i];
            if (!seeker.seek(mark, delimiter)) {
                if (i > 0) {
                    throw new UnexpectedEndOfInputException("Near " + mark);
                }
                // We're just at the end
                return false;
            }
            switch(entry.type()) {
                case ID:
                    if (seeker.tryExtract(mark, entry.extractor())) {
                        switch(idType) {
                            case STRING:
                            case INTEGER:
                                Object idValue = entry.extractor().value();
                                doContinue = visitor.id(idValue, entry.group());
                                if (entry.name() != null) {
                                    doContinue = visitor.property(entry.name(), idValue);
                                }
                                break;
                            case ACTUAL:
                                doContinue = visitor.id(((LongExtractor) entry.extractor()).longValue());
                                break;
                            default:
                                throw new IllegalArgumentException(idType.name());
                        }
                    }
                    break;
                case START_ID:
                    if (seeker.tryExtract(mark, entry.extractor())) {
                        switch(idType) {
                            case STRING:
                                doContinue = visitor.startId(entry.extractor().value(), entry.group());
                                break;
                            case INTEGER:
                                doContinue = visitor.startId(entry.extractor().value(), entry.group());
                                break;
                            case ACTUAL:
                                doContinue = visitor.startId(((LongExtractor) entry.extractor()).longValue());
                                break;
                            default:
                                throw new IllegalArgumentException(idType.name());
                        }
                    }
                    break;
                case END_ID:
                    if (seeker.tryExtract(mark, entry.extractor())) {
                        switch(idType) {
                            case STRING:
                                doContinue = visitor.endId(entry.extractor().value(), entry.group());
                                break;
                            case INTEGER:
                                doContinue = visitor.endId(entry.extractor().value(), entry.group());
                                break;
                            case ACTUAL:
                                doContinue = visitor.endId(((LongExtractor) entry.extractor()).longValue());
                                break;
                            default:
                                throw new IllegalArgumentException(idType.name());
                        }
                    }
                    break;
                case TYPE:
                    if (seeker.tryExtract(mark, entry.extractor())) {
                        doContinue = visitor.type((String) entry.extractor().value());
                    }
                    break;
                case PROPERTY:
                    if (seeker.tryExtract(mark, entry.extractor(), entry.optionalParameter())) {
                        // TODO since PropertyStore#encodeValue takes Object there's no point splitting up
                        // into different primitive types
                        Object value = entry.extractor().value();
                        if (!isEmptyArray(value)) {
                            doContinue = visitor.property(entry.name(), value);
                        }
                    }
                    break;
                case LABEL:
                    if (seeker.tryExtract(mark, entry.extractor())) {
                        Object labelsValue = entry.extractor().value();
                        if (labelsValue.getClass().isArray()) {
                            doContinue = visitor.labels((String[]) labelsValue);
                        } else {
                            doContinue = visitor.labels(new String[] { (String) labelsValue });
                        }
                    }
                    break;
                case IGNORE:
                    break;
                default:
                    throw new IllegalArgumentException(entry.type().toString());
            }
            if (mark.isEndOfLine()) {
                // We're at the end of the line, break and return an entity with what we have.
                break;
            }
        }
        while (!mark.isEndOfLine()) {
            seeker.seek(mark, delimiter);
            if (doContinue) {
                seeker.tryExtract(mark, stringExtractor, entry.optionalParameter());
                badCollector.collectExtraColumns(seeker.sourceDescription(), lineNumber, stringExtractor.value());
            }
        }
        visitor.endOfEntity();
        return true;
    } catch (final RuntimeException e) {
        String stringValue = null;
        try {
            Extractors extractors = new Extractors('?');
            if (seeker.tryExtract(mark, extractors.string(), entry.optionalParameter())) {
                stringValue = extractors.string().value();
            }
        } catch (Exception e1) {
        // OK
        }
        String message = format("ERROR in input" + "%n  data source: %s" + "%n  in field: %s" + "%n  for header: %s" + "%n  raw field value: %s" + "%n  original error: %s", seeker, entry + ":" + (i + 1), header, stringValue != null ? stringValue : "??", e.getMessage());
        if (e instanceof InputException) {
            throw Exceptions.withMessage(e, message);
        }
        throw new InputException(message, e);
    }
}
Also used : UnexpectedEndOfInputException(org.neo4j.internal.batchimport.input.UnexpectedEndOfInputException) IOException(java.io.IOException) InputException(org.neo4j.internal.batchimport.input.InputException) UnexpectedEndOfInputException(org.neo4j.internal.batchimport.input.UnexpectedEndOfInputException) Extractors(org.neo4j.csv.reader.Extractors) LongExtractor(org.neo4j.csv.reader.Extractors.LongExtractor) InputException(org.neo4j.internal.batchimport.input.InputException) UnexpectedEndOfInputException(org.neo4j.internal.batchimport.input.UnexpectedEndOfInputException)

Aggregations

Extractors (org.neo4j.csv.reader.Extractors)22 CharSeeker (org.neo4j.csv.reader.CharSeeker)18 Test (org.junit.jupiter.api.Test)11 IdType (org.neo4j.internal.batchimport.input.IdType)10 Test (org.junit.Test)8 DataFactories.defaultFormatNodeFileHeader (org.neo4j.internal.batchimport.input.csv.DataFactories.defaultFormatNodeFileHeader)8 DataFactories.defaultFormatRelationshipFileHeader (org.neo4j.internal.batchimport.input.csv.DataFactories.defaultFormatRelationshipFileHeader)8 DataFactories.defaultFormatNodeFileHeader (org.neo4j.unsafe.impl.batchimport.input.csv.DataFactories.defaultFormatNodeFileHeader)6 IOException (java.io.IOException)3 ParallelBatchImporter (org.neo4j.internal.batchimport.ParallelBatchImporter)2 DataGeneratorInput (org.neo4j.internal.batchimport.input.DataGeneratorInput)2 Input (org.neo4j.internal.batchimport.input.Input)2 JobScheduler (org.neo4j.scheduler.JobScheduler)2 DuplicateHeaderException (org.neo4j.unsafe.impl.batchimport.input.DuplicateHeaderException)2 Reader (java.io.Reader)1 StringReader (java.io.StringReader)1 Path (java.nio.file.Path)1 Config (org.neo4j.configuration.Config)1 CharReadable (org.neo4j.csv.reader.CharReadable)1 Configuration (org.neo4j.csv.reader.Configuration)1