use of org.neo4j.internal.batchimport.input.IdType in project neo4j by neo4j.
the class QuickImport method main.
public static void main(String[] arguments) throws IOException {
Args args = Args.parse(arguments);
long nodeCount = parseLongWithUnit(args.get("nodes", null));
long relationshipCount = parseLongWithUnit(args.get("relationships", null));
int labelCount = args.getNumber("labels", 4).intValue();
int relationshipTypeCount = args.getNumber("relationship-types", 4).intValue();
Path dir = Path.of(args.get("into"));
long randomSeed = args.getNumber("random-seed", currentTimeMillis()).longValue();
Configuration config = Configuration.COMMAS;
Extractors extractors = new Extractors(config.arrayDelimiter());
IdType idType = IdType.valueOf(args.get("id-type", IdType.INTEGER.name()));
Groups groups = new Groups();
Header nodeHeader = parseNodeHeader(args, idType, extractors, groups);
Header relationshipHeader = parseRelationshipHeader(args, idType, extractors, groups);
Config dbConfig;
String dbConfigFileName = args.get("db-config", null);
if (dbConfigFileName != null) {
dbConfig = Config.newBuilder().fromFile(Path.of(dbConfigFileName)).build();
} else {
dbConfig = Config.defaults();
}
Boolean highIo = args.has("high-io") ? args.getBoolean("high-io") : null;
LogProvider logging = NullLogProvider.getInstance();
long pageCacheMemory = args.getNumber("pagecache-memory", org.neo4j.internal.batchimport.Configuration.MAX_PAGE_CACHE_MEMORY).longValue();
org.neo4j.internal.batchimport.Configuration importConfig = new org.neo4j.internal.batchimport.Configuration.Overridden(defaultConfiguration(dir)) {
@Override
public int maxNumberOfProcessors() {
return args.getNumber("processors", super.maxNumberOfProcessors()).intValue();
}
@Override
public boolean highIO() {
return highIo != null ? highIo : super.highIO();
}
@Override
public long pageCacheMemory() {
return pageCacheMemory;
}
@Override
public long maxMemoryUsage() {
String custom = args.get("max-memory", null);
return custom != null ? parseMaxMemory(custom) : super.maxMemoryUsage();
}
@Override
public IndexConfig indexConfig() {
return IndexConfig.create().withLabelIndex().withRelationshipTypeIndex();
}
};
float factorBadNodeData = args.getNumber("factor-bad-node-data", 0).floatValue();
float factorBadRelationshipData = args.getNumber("factor-bad-relationship-data", 0).floatValue();
Input input = new DataGeneratorInput(nodeCount, relationshipCount, idType, randomSeed, 0, nodeHeader, relationshipHeader, labelCount, relationshipTypeCount, factorBadNodeData, factorBadRelationshipData);
try (FileSystemAbstraction fileSystem = new DefaultFileSystemAbstraction();
Lifespan life = new Lifespan()) {
BatchImporter consumer;
if (args.getBoolean("to-csv")) {
consumer = new CsvOutput(dir, nodeHeader, relationshipHeader, config);
} else {
System.out.println("Seed " + randomSeed);
final JobScheduler jobScheduler = life.add(createScheduler());
boolean verbose = args.getBoolean("v");
ExecutionMonitor monitor = verbose ? new SpectrumExecutionMonitor(2, TimeUnit.SECONDS, System.out, 100) : defaultVisible();
consumer = BatchImporterFactory.withHighestPriority().instantiate(DatabaseLayout.ofFlat(dir), fileSystem, PageCacheTracer.NULL, importConfig, new SimpleLogService(logging, logging), monitor, EMPTY, dbConfig, RecordFormatSelector.selectForConfig(dbConfig, logging), NO_MONITOR, jobScheduler, Collector.EMPTY, TransactionLogInitializer.getLogFilesInitializer(), new IndexImporterFactoryImpl(dbConfig), INSTANCE);
}
consumer.doImport(input);
}
}
use of org.neo4j.internal.batchimport.input.IdType in project neo4j by neo4j.
the class CsvInputTest method shouldCloseDataIteratorsInTheEnd.
@Test
public void shouldCloseDataIteratorsInTheEnd() throws Exception {
// GIVEN
CapturingDataFactories nodeData = new CapturingDataFactories(config -> charReader("1"), NO_DECORATOR);
CapturingDataFactories relationshipData = new CapturingDataFactories(config -> charReader("1,1"), InputEntityDecorators.defaultRelationshipType("TYPE"));
IdType idType = IdType.STRING;
Input input = new CsvInput(nodeData, header(entry(null, Type.ID, CsvInput.idExtractor(idType, extractors))), relationshipData, header(entry(null, Type.START_ID, CsvInput.idExtractor(idType, extractors)), entry(null, Type.END_ID, CsvInput.idExtractor(idType, extractors))), idType, config(), NO_MONITOR, INSTANCE);
// WHEN
try (InputIterator iterator = input.nodes(EMPTY).iterator()) {
readNext(iterator);
}
try (InputIterator iterator = input.relationships(EMPTY).iterator()) {
readNext(iterator);
}
// THEN
assertClosed(nodeData.last());
assertClosed(relationshipData.last());
}
use of org.neo4j.internal.batchimport.input.IdType in project neo4j by neo4j.
the class CsvInputTest method shouldHaveNodesBelongToGroupSpecifiedInHeader.
@Test
public void shouldHaveNodesBelongToGroupSpecifiedInHeader() throws Exception {
// GIVEN
IdType idType = IdType.INTEGER;
Iterable<DataFactory> data = dataIterable(data("123,one\n" + "456,two"));
Groups groups = new Groups();
Group group = groups.getOrCreate("MyGroup");
Input input = new CsvInput(data, header(entry(null, Type.ID, group.name(), CsvInput.idExtractor(idType, extractors)), entry("name", Type.PROPERTY, extractors.string())), datas(), defaultFormatRelationshipFileHeader(), idType, config(), NO_MONITOR, INSTANCE);
// WHEN/THEN
try (InputIterator nodes = input.nodes(EMPTY).iterator()) {
assertNextNode(nodes, group, 123L, properties("name", "one"), labels());
assertNextNode(nodes, group, 456L, properties("name", "two"), labels());
assertFalse(readNext(nodes));
}
}
use of org.neo4j.internal.batchimport.input.IdType in project neo4j by neo4j.
the class CsvInputTest method shouldCalculateCorrectEstimatesForGZippedInputFile.
@Test
public void shouldCalculateCorrectEstimatesForGZippedInputFile() throws IOException {
// GIVEN
IdType idType = STRING;
Path uncompressedFile = createNodeInputDataFile(mebiBytes(10));
Path compressedFile = compressWithGZip(uncompressedFile);
Assertions.assertThat(Files.size(compressedFile)).isLessThan(Files.size(uncompressedFile));
// WHEN
Input.Estimates uncompressedEstimates = calculateEstimatesOnSingleFileNodeData(idType, uncompressedFile);
Input.Estimates compressedEstimates = calculateEstimatesOnSingleFileNodeData(idType, compressedFile);
// then the compressed and uncompressed should be _roughly_ equal. The thing with GZIP is that there's no reliable way
// of getting the uncompressed data size w/o decompressing it in its entirety, and this is why the estimator doesn't do this
// but instead tries to estimate its compression rate after reading a chunk of it
assertEstimatesEquals(uncompressedEstimates, compressedEstimates, 0.01);
}
Aggregations