use of org.neo4j.internal.batchimport.input.InputEntity in project neo4j by neo4j.
the class CsvInputBatchImportIT method nodeDataAsFile.
private Path nodeDataAsFile(List<InputEntity> nodeData) throws IOException {
Path file = testDirectory.file("nodes.csv");
try (Writer writer = fileSystem.openAsWriter(file, StandardCharsets.UTF_8, false)) {
// Header
println(writer, "id:ID,name,pointA:Point{crs:WGS-84},pointB:Point,date:Date,time:Time,dateTime:DateTime,dateTime2:DateTime,localTime:LocalTime," + "localDateTime:LocalDateTime,duration:Duration,floatArray:float[],dateArray:date[],pointArray:point[],some-labels:LABEL");
// Data
for (InputEntity node : nodeData) {
String csvLabels = csvLabels(node.labels());
StringBuilder sb = new StringBuilder().append(node.id()).append(',');
for (int i = 0; i < node.propertyCount(); i++) {
sb.append(serializePropertyValue(node.propertyValue(i))).append(',');
}
if (csvLabels != null && !csvLabels.isEmpty()) {
sb.append(csvLabels);
}
println(writer, sb.toString());
}
}
return file;
}
use of org.neo4j.internal.batchimport.input.InputEntity in project neo4j by neo4j.
the class CsvInput method sample.
private long[] sample(Iterable<DataFactory> dataFactories, Header.Factory headerFactory, PropertySizeCalculator valueSizeCalculator, ToIntFunction<InputEntity> additionalCalculator) throws IOException {
// [entity count, property count, property size, labels (for nodes only)]
long[] estimates = new long[4];
try (CsvInputChunkProxy chunk = new CsvInputChunkProxy()) {
// One group of input files
int groupId = 0;
for (// one input group
DataFactory dataFactory : // one input group
dataFactories) {
groupId++;
Header header = null;
Data data = dataFactory.create(config);
RawIterator<CharReadable, IOException> sources = data.stream();
while (sources.hasNext()) {
try (CharReadable source = sources.next()) {
if (header == null) {
// Extract the header from the first file in this group
// This is the only place we monitor type normalization because it's before import and it touches all headers
header = extractHeader(source, headerFactory, idType, config, groups, monitor);
}
try (CsvInputIterator iterator = new CsvInputIterator(source, data.decorator(), header, config, idType, EMPTY, CsvGroupInputIterator.extractors(config), groupId);
InputEntity entity = new InputEntity()) {
int entities = 0;
int properties = 0;
int propertySize = 0;
int additional = 0;
while (iterator.position() < ESTIMATE_SAMPLE_SIZE && iterator.next(chunk)) {
for (; chunk.next(entity); entities++) {
properties += entity.propertyCount();
propertySize += Inputs.calculatePropertySize(entity, valueSizeCalculator, NULL, memoryTracker);
additional += additionalCalculator.applyAsInt(entity);
}
}
if (entities > 0) {
long position = iterator.position();
double compressionRatio = iterator.compressionRatio();
double actualFileSize = source.length() / compressionRatio;
long entityCountInSource = (long) ((actualFileSize / position) * entities);
estimates[0] += entityCountInSource;
estimates[1] += ((double) properties / entities) * entityCountInSource;
estimates[2] += ((double) propertySize / entities) * entityCountInSource;
estimates[3] += ((double) additional / entities) * entityCountInSource;
}
}
}
}
}
}
return estimates;
}
use of org.neo4j.internal.batchimport.input.InputEntity in project neo4j by neo4j.
the class CsvOutput method consume.
private void consume(String name, InputIterator entities, Header header, Deserializer deserializer) throws IOException {
try (PrintStream out = file(name + "header.csv")) {
serialize(out, header);
}
try {
int threads = Runtime.getRuntime().availableProcessors();
ExecutorService executor = Executors.newFixedThreadPool(threads);
for (int i = 0; i < threads; i++) {
int id = i;
executor.submit((Callable<Void>) () -> {
StringDeserialization deserialization = new StringDeserialization(config);
try (PrintStream out = file(name + "-" + id + ".csv");
InputChunk chunk = entities.newChunk()) {
InputEntity entity = new InputEntity();
while (entities.next(chunk)) {
while (chunk.next(entity)) {
out.println(deserializer.apply(entity, deserialization, header));
}
}
}
return null;
});
}
executor.shutdown();
executor.awaitTermination(10, TimeUnit.MINUTES);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new IOException(e);
}
}
use of org.neo4j.internal.batchimport.input.InputEntity in project neo4j by neo4j.
the class CsvInputEstimateCalculationIT method generateData.
private DataFactory generateData(Header.Factory factory, MutableLong start, long count, long nodeCount, String headerString, String fileName, Groups groups) throws IOException {
Path file = testDirectory.file(fileName);
Header header = factory.create(charSeeker(wrap(headerString), COMMAS, false), COMMAS, IdType.INTEGER, groups);
Distribution<String> distribution = new Distribution<>(new String[] { "Token" });
Deserialization<String> deserialization = new StringDeserialization(COMMAS);
try (PrintWriter out = new PrintWriter(Files.newBufferedWriter(file));
RandomEntityDataGenerator generator = new RandomEntityDataGenerator(nodeCount, count, toIntExact(count), random.seed(), start.longValue(), header, distribution, distribution, 0, 0, 5);
InputChunk chunk = generator.newChunk();
InputEntity entity = new InputEntity()) {
out.println(headerString);
while (generator.next(chunk)) {
while (chunk.next(entity)) {
out.println(convert(entity, deserialization, header));
}
}
}
start.add(count);
return DataFactories.data(InputEntityDecorators.NO_DECORATOR, StandardCharsets.UTF_8, file);
}
use of org.neo4j.internal.batchimport.input.InputEntity in project neo4j by neo4j.
the class ParallelBatchImporterTest method verifyData.
private void verifyData(int nodeCount, int relationshipCount, GraphDatabaseService db, Transaction tx, IdGroupDistribution groups, long nodeRandomSeed, long relationshipRandomSeed) throws IOException {
// Read all nodes, relationships and properties ad verify against the input data.
LongAdder propertyCount = new LongAdder();
try (InputIterator nodes = nodes(nodeRandomSeed, nodeCount, config.batchSize(), inputIdGenerator, groups, propertyCount).iterator();
InputIterator relationships = relationships(relationshipRandomSeed, relationshipCount, config.batchSize(), inputIdGenerator, groups, propertyCount, new LongAdder()).iterator();
ResourceIterator<Node> dbNodes = tx.getAllNodes().iterator()) {
// Nodes
Map<String, Node> nodeByInputId = new HashMap<>(nodeCount);
while (dbNodes.hasNext()) {
Node node = dbNodes.next();
String id = (String) node.getProperty("id");
assertNull(nodeByInputId.put(id, node));
}
int verifiedNodes = 0;
long allNodesScanLabelCount = 0;
InputChunk chunk = nodes.newChunk();
InputEntity input = new InputEntity();
while (nodes.next(chunk)) {
while (chunk.next(input)) {
String iid = uniqueId(input.idGroup, input.objectId);
Node node = nodeByInputId.get(iid);
assertNodeEquals(input, node);
verifiedNodes++;
assertDegrees(node);
allNodesScanLabelCount += Iterables.count(node.getLabels());
}
}
assertEquals(nodeCount, verifiedNodes);
// Labels
long labelScanStoreEntryCount = stream(tx.getAllLabels()).flatMap(l -> tx.findNodes(l).stream()).count();
assertEquals(allNodesScanLabelCount, labelScanStoreEntryCount, format("Expected label scan store and node store to have same number labels. But %n" + "#labelsInNodeStore=%d%n" + "#labelsInLabelScanStore=%d%n", allNodesScanLabelCount, labelScanStoreEntryCount));
// Relationships
chunk = relationships.newChunk();
Map<String, Relationship> relationshipByName = new HashMap<>();
for (Relationship relationship : tx.getAllRelationships()) {
relationshipByName.put((String) relationship.getProperty("id"), relationship);
}
int verifiedRelationships = 0;
while (relationships.next(chunk)) {
while (chunk.next(input)) {
if (!inputIdGenerator.isMiss(input.objectStartId) && !inputIdGenerator.isMiss(input.objectEndId)) {
// A relationship referring to missing nodes. The InputIdGenerator is expected to generate
// some (very few) of those. Skip it.
String name = (String) propertyOf(input, "id");
Relationship relationship = relationshipByName.get(name);
assertNotNull(relationship, "Expected there to be a relationship with name '" + name + "'");
assertEquals(nodeByInputId.get(uniqueId(input.startIdGroup, input.objectStartId)), relationship.getStartNode());
assertEquals(nodeByInputId.get(uniqueId(input.endIdGroup, input.objectEndId)), relationship.getEndNode());
assertRelationshipEquals(input, relationship);
}
verifiedRelationships++;
}
}
assertEquals(relationshipCount, verifiedRelationships);
}
}
Aggregations