use of org.neo4j.internal.batchimport.input.Input in project neo4j by neo4j.
the class CsvInputTest method shouldParseDatePropertyValues.
@Test
public void shouldParseDatePropertyValues() throws Exception {
// GIVEN
DataFactory data = data(":ID,name,date:Date\n" + "0,Mattias,2018-02-27\n" + "1,Johan,2018-03-01\n");
Iterable<DataFactory> dataIterable = dataIterable(data);
Input input = new CsvInput(dataIterable, defaultFormatNodeFileHeader(), datas(), defaultFormatRelationshipFileHeader(), ACTUAL, config(), NO_MONITOR, INSTANCE);
// WHEN
try (InputIterator nodes = input.nodes(EMPTY).iterator()) {
// THEN
assertNextNode(nodes, 0L, new Object[] { "name", "Mattias", "date", DateValue.date(2018, 2, 27) }, labels());
assertNextNode(nodes, 1L, new Object[] { "name", "Johan", "date", DateValue.date(2018, 3, 1) }, labels());
assertFalse(readNext(nodes));
}
}
use of org.neo4j.internal.batchimport.input.Input in project neo4j by neo4j.
the class CsvInputTest method shouldIgnoreEmptyExtraColumns.
@Test
public void shouldIgnoreEmptyExtraColumns() throws Exception {
// GIVEN
Iterable<DataFactory> data = datas(CsvInputTest.data(":ID,one\n" + "1,test,\n" + "2,test,,additional"));
// WHEN
Collector collector = mock(Collector.class);
Input input = new CsvInput(data, defaultFormatNodeFileHeader(), datas(), defaultFormatRelationshipFileHeader(), IdType.INTEGER, config(), NO_MONITOR, INSTANCE);
// THEN
try (InputIterator nodes = input.nodes(collector).iterator()) {
// THEN
assertNextNode(nodes, 1L, properties("one", "test"), labels());
assertNextNode(nodes, 2L, properties("one", "test"), labels());
assertFalse(readNext(nodes));
}
verify(collector).collectExtraColumns(anyString(), eq(1L), eq(null));
verify(collector).collectExtraColumns(anyString(), eq(2L), eq(null));
verify(collector).collectExtraColumns(anyString(), eq(2L), eq("additional"));
}
use of org.neo4j.internal.batchimport.input.Input in project neo4j by neo4j.
the class CsvInputTest method shouldHandleMultipleInputGroups.
@Test
public void shouldHandleMultipleInputGroups() throws Exception {
// GIVEN multiple input groups, each with their own, specific, header
DataFactory group1 = data(":ID,name,kills:int,health:int\n" + "1,Jim,10,100\n" + "2,Abathur,0,200\n");
DataFactory group2 = data(":ID,type\n" + "3,zergling\n" + "4,csv\n");
Iterable<DataFactory> data = dataIterable(group1, group2);
Input input = new CsvInput(data, defaultFormatNodeFileHeader(), datas(), defaultFormatRelationshipFileHeader(), IdType.STRING, config(), NO_MONITOR, INSTANCE);
// WHEN iterating over them, THEN the expected data should come out
try (InputIterator nodes = input.nodes(EMPTY).iterator()) {
assertNextNode(nodes, "1", properties("name", "Jim", "kills", 10, "health", 100), labels());
assertNextNode(nodes, "2", properties("name", "Abathur", "kills", 0, "health", 200), labels());
assertNextNode(nodes, "3", properties("type", "zergling"), labels());
assertNextNode(nodes, "4", properties("type", "csv"), labels());
assertFalse(readNext(nodes));
}
}
use of org.neo4j.internal.batchimport.input.Input in project neo4j by neo4j.
the class ImportLogic method initialize.
public void initialize(Input input) throws IOException {
log.info("Import starting");
startTime = currentTimeMillis();
this.input = input;
PageCacheArrayFactoryMonitor numberArrayFactoryMonitor = new PageCacheArrayFactoryMonitor();
numberArrayFactory = auto(neoStore.getPageCache(), pageCacheTracer, databaseDirectory, config.allowCacheAllocationOnHeap(), numberArrayFactoryMonitor, log, databaseName);
// Some temporary caches and indexes in the import
idMapper = instantiateIdMapper(input);
nodeRelationshipCache = new NodeRelationshipCache(numberArrayFactory, dbConfig.get(GraphDatabaseSettings.dense_node_threshold), memoryTracker);
Input.Estimates inputEstimates = input.calculateEstimates(neoStore.getPropertyStore().newValueEncodedSizeCalculator());
// Sanity checking against estimates
new EstimationSanityChecker(recordFormats, monitor).sanityCheck(inputEstimates);
new HeapSizeSanityChecker(monitor).sanityCheck(inputEstimates, recordFormats, neoStore, NodeRelationshipCache.memoryEstimation(inputEstimates.numberOfNodes()), idMapper.memoryEstimation(inputEstimates.numberOfNodes()));
dependencies.satisfyDependencies(inputEstimates, idMapper, neoStore, nodeRelationshipCache, numberArrayFactoryMonitor);
if (neoStore.determineDoubleRelationshipRecordUnits(inputEstimates)) {
monitor.doubleRelationshipRecordUnitsEnabled();
}
executionMonitor.initialize(dependencies);
}
use of org.neo4j.internal.batchimport.input.Input in project neo4j by neo4j.
the class QuickImport method main.
public static void main(String[] arguments) throws IOException {
Args args = Args.parse(arguments);
long nodeCount = parseLongWithUnit(args.get("nodes", null));
long relationshipCount = parseLongWithUnit(args.get("relationships", null));
int labelCount = args.getNumber("labels", 4).intValue();
int relationshipTypeCount = args.getNumber("relationship-types", 4).intValue();
Path dir = Path.of(args.get("into"));
long randomSeed = args.getNumber("random-seed", currentTimeMillis()).longValue();
Configuration config = Configuration.COMMAS;
Extractors extractors = new Extractors(config.arrayDelimiter());
IdType idType = IdType.valueOf(args.get("id-type", IdType.INTEGER.name()));
Groups groups = new Groups();
Header nodeHeader = parseNodeHeader(args, idType, extractors, groups);
Header relationshipHeader = parseRelationshipHeader(args, idType, extractors, groups);
Config dbConfig;
String dbConfigFileName = args.get("db-config", null);
if (dbConfigFileName != null) {
dbConfig = Config.newBuilder().fromFile(Path.of(dbConfigFileName)).build();
} else {
dbConfig = Config.defaults();
}
Boolean highIo = args.has("high-io") ? args.getBoolean("high-io") : null;
LogProvider logging = NullLogProvider.getInstance();
long pageCacheMemory = args.getNumber("pagecache-memory", org.neo4j.internal.batchimport.Configuration.MAX_PAGE_CACHE_MEMORY).longValue();
org.neo4j.internal.batchimport.Configuration importConfig = new org.neo4j.internal.batchimport.Configuration.Overridden(defaultConfiguration(dir)) {
@Override
public int maxNumberOfProcessors() {
return args.getNumber("processors", super.maxNumberOfProcessors()).intValue();
}
@Override
public boolean highIO() {
return highIo != null ? highIo : super.highIO();
}
@Override
public long pageCacheMemory() {
return pageCacheMemory;
}
@Override
public long maxMemoryUsage() {
String custom = args.get("max-memory", null);
return custom != null ? parseMaxMemory(custom) : super.maxMemoryUsage();
}
@Override
public IndexConfig indexConfig() {
return IndexConfig.create().withLabelIndex().withRelationshipTypeIndex();
}
};
float factorBadNodeData = args.getNumber("factor-bad-node-data", 0).floatValue();
float factorBadRelationshipData = args.getNumber("factor-bad-relationship-data", 0).floatValue();
Input input = new DataGeneratorInput(nodeCount, relationshipCount, idType, randomSeed, 0, nodeHeader, relationshipHeader, labelCount, relationshipTypeCount, factorBadNodeData, factorBadRelationshipData);
try (FileSystemAbstraction fileSystem = new DefaultFileSystemAbstraction();
Lifespan life = new Lifespan()) {
BatchImporter consumer;
if (args.getBoolean("to-csv")) {
consumer = new CsvOutput(dir, nodeHeader, relationshipHeader, config);
} else {
System.out.println("Seed " + randomSeed);
final JobScheduler jobScheduler = life.add(createScheduler());
boolean verbose = args.getBoolean("v");
ExecutionMonitor monitor = verbose ? new SpectrumExecutionMonitor(2, TimeUnit.SECONDS, System.out, 100) : defaultVisible();
consumer = BatchImporterFactory.withHighestPriority().instantiate(DatabaseLayout.ofFlat(dir), fileSystem, PageCacheTracer.NULL, importConfig, new SimpleLogService(logging, logging), monitor, EMPTY, dbConfig, RecordFormatSelector.selectForConfig(dbConfig, logging), NO_MONITOR, jobScheduler, Collector.EMPTY, TransactionLogInitializer.getLogFilesInitializer(), new IndexImporterFactoryImpl(dbConfig), INSTANCE);
}
consumer.doImport(input);
}
}
Aggregations