use of org.neo4j.csv.reader.Extractors in project neo4j by neo4j.
the class QuickImport method main.
public static void main(String[] arguments) throws IOException {
Args args = Args.parse(arguments);
long nodeCount = parseLongWithUnit(args.get("nodes", null));
long relationshipCount = parseLongWithUnit(args.get("relationships", null));
int labelCount = args.getNumber("labels", 4).intValue();
int relationshipTypeCount = args.getNumber("relationship-types", 4).intValue();
Path dir = Path.of(args.get("into"));
long randomSeed = args.getNumber("random-seed", currentTimeMillis()).longValue();
Configuration config = Configuration.COMMAS;
Extractors extractors = new Extractors(config.arrayDelimiter());
IdType idType = IdType.valueOf(args.get("id-type", IdType.INTEGER.name()));
Groups groups = new Groups();
Header nodeHeader = parseNodeHeader(args, idType, extractors, groups);
Header relationshipHeader = parseRelationshipHeader(args, idType, extractors, groups);
Config dbConfig;
String dbConfigFileName = args.get("db-config", null);
if (dbConfigFileName != null) {
dbConfig = Config.newBuilder().fromFile(Path.of(dbConfigFileName)).build();
} else {
dbConfig = Config.defaults();
}
Boolean highIo = args.has("high-io") ? args.getBoolean("high-io") : null;
LogProvider logging = NullLogProvider.getInstance();
long pageCacheMemory = args.getNumber("pagecache-memory", org.neo4j.internal.batchimport.Configuration.MAX_PAGE_CACHE_MEMORY).longValue();
org.neo4j.internal.batchimport.Configuration importConfig = new org.neo4j.internal.batchimport.Configuration.Overridden(defaultConfiguration(dir)) {
@Override
public int maxNumberOfProcessors() {
return args.getNumber("processors", super.maxNumberOfProcessors()).intValue();
}
@Override
public boolean highIO() {
return highIo != null ? highIo : super.highIO();
}
@Override
public long pageCacheMemory() {
return pageCacheMemory;
}
@Override
public long maxMemoryUsage() {
String custom = args.get("max-memory", null);
return custom != null ? parseMaxMemory(custom) : super.maxMemoryUsage();
}
@Override
public IndexConfig indexConfig() {
return IndexConfig.create().withLabelIndex().withRelationshipTypeIndex();
}
};
float factorBadNodeData = args.getNumber("factor-bad-node-data", 0).floatValue();
float factorBadRelationshipData = args.getNumber("factor-bad-relationship-data", 0).floatValue();
Input input = new DataGeneratorInput(nodeCount, relationshipCount, idType, randomSeed, 0, nodeHeader, relationshipHeader, labelCount, relationshipTypeCount, factorBadNodeData, factorBadRelationshipData);
try (FileSystemAbstraction fileSystem = new DefaultFileSystemAbstraction();
Lifespan life = new Lifespan()) {
BatchImporter consumer;
if (args.getBoolean("to-csv")) {
consumer = new CsvOutput(dir, nodeHeader, relationshipHeader, config);
} else {
System.out.println("Seed " + randomSeed);
final JobScheduler jobScheduler = life.add(createScheduler());
boolean verbose = args.getBoolean("v");
ExecutionMonitor monitor = verbose ? new SpectrumExecutionMonitor(2, TimeUnit.SECONDS, System.out, 100) : defaultVisible();
consumer = BatchImporterFactory.withHighestPriority().instantiate(DatabaseLayout.ofFlat(dir), fileSystem, PageCacheTracer.NULL, importConfig, new SimpleLogService(logging, logging), monitor, EMPTY, dbConfig, RecordFormatSelector.selectForConfig(dbConfig, logging), NO_MONITOR, jobScheduler, Collector.EMPTY, TransactionLogInitializer.getLogFilesInitializer(), new IndexImporterFactoryImpl(dbConfig), INSTANCE);
}
consumer.doImport(input);
}
}
use of org.neo4j.csv.reader.Extractors in project neo4j by neo4j.
the class CsvInputParser method next.
boolean next(InputEntityVisitor visitor) throws IOException {
lineNumber++;
int i = 0;
Header.Entry entry = null;
Header.Entry[] entries = header.entries();
try {
boolean doContinue = true;
for (i = 0; i < entries.length && doContinue; i++) {
entry = entries[i];
if (!seeker.seek(mark, delimiter)) {
if (i > 0) {
throw new UnexpectedEndOfInputException("Near " + mark);
}
// We're just at the end
return false;
}
switch(entry.type()) {
case ID:
if (seeker.tryExtract(mark, entry.extractor())) {
switch(idType) {
case STRING:
case INTEGER:
Object idValue = entry.extractor().value();
doContinue = visitor.id(idValue, entry.group());
if (entry.name() != null) {
doContinue = visitor.property(entry.name(), idValue);
}
break;
case ACTUAL:
doContinue = visitor.id(((LongExtractor) entry.extractor()).longValue());
break;
default:
throw new IllegalArgumentException(idType.name());
}
}
break;
case START_ID:
if (seeker.tryExtract(mark, entry.extractor())) {
switch(idType) {
case STRING:
doContinue = visitor.startId(entry.extractor().value(), entry.group());
break;
case INTEGER:
doContinue = visitor.startId(entry.extractor().value(), entry.group());
break;
case ACTUAL:
doContinue = visitor.startId(((LongExtractor) entry.extractor()).longValue());
break;
default:
throw new IllegalArgumentException(idType.name());
}
}
break;
case END_ID:
if (seeker.tryExtract(mark, entry.extractor())) {
switch(idType) {
case STRING:
doContinue = visitor.endId(entry.extractor().value(), entry.group());
break;
case INTEGER:
doContinue = visitor.endId(entry.extractor().value(), entry.group());
break;
case ACTUAL:
doContinue = visitor.endId(((LongExtractor) entry.extractor()).longValue());
break;
default:
throw new IllegalArgumentException(idType.name());
}
}
break;
case TYPE:
if (seeker.tryExtract(mark, entry.extractor())) {
doContinue = visitor.type((String) entry.extractor().value());
}
break;
case PROPERTY:
if (seeker.tryExtract(mark, entry.extractor(), entry.optionalParameter())) {
// TODO since PropertyStore#encodeValue takes Object there's no point splitting up
// into different primitive types
Object value = entry.extractor().value();
if (!isEmptyArray(value)) {
doContinue = visitor.property(entry.name(), value);
}
}
break;
case LABEL:
if (seeker.tryExtract(mark, entry.extractor())) {
Object labelsValue = entry.extractor().value();
if (labelsValue.getClass().isArray()) {
doContinue = visitor.labels((String[]) labelsValue);
} else {
doContinue = visitor.labels(new String[] { (String) labelsValue });
}
}
break;
case IGNORE:
break;
default:
throw new IllegalArgumentException(entry.type().toString());
}
if (mark.isEndOfLine()) {
// We're at the end of the line, break and return an entity with what we have.
break;
}
}
while (!mark.isEndOfLine()) {
seeker.seek(mark, delimiter);
if (doContinue) {
seeker.tryExtract(mark, stringExtractor, entry.optionalParameter());
badCollector.collectExtraColumns(seeker.sourceDescription(), lineNumber, stringExtractor.value());
}
}
visitor.endOfEntity();
return true;
} catch (final RuntimeException e) {
String stringValue = null;
try {
Extractors extractors = new Extractors('?');
if (seeker.tryExtract(mark, extractors.string(), entry.optionalParameter())) {
stringValue = extractors.string().value();
}
} catch (Exception e1) {
// OK
}
String message = format("ERROR in input" + "%n data source: %s" + "%n in field: %s" + "%n for header: %s" + "%n raw field value: %s" + "%n original error: %s", seeker, entry + ":" + (i + 1), header, stringValue != null ? stringValue : "??", e.getMessage());
if (e instanceof InputException) {
throw Exceptions.withMessage(e, message);
}
throw new InputException(message, e);
}
}
Aggregations