use of ldbc.snb.datagen.entities.dynamic.person.IP in project ldbc_snb_datagen_spark by ldbc.
the class CommentGenerator method createComments.
public Stream<Pair<Comment, Stream<Like>>> createComments(RandomGeneratorFarm randomFarm, final Forum forum, final Post post, long numComments, Iterator<Long> idIterator, long blockId) {
List<Message> parentCandidates = new ArrayList<>();
parentCandidates.add(post);
Properties prop = new Properties();
prop.setProperty("type", "comment");
// each iteration adds a new leaf node, for the first iteration this is a child of root Post
return Streams.stream(Iterators.forIterator(0, i -> i < numComments, i -> ++i, i -> {
// pick from parent candidates
int parentIndex = randomFarm.get(RandomGeneratorFarm.Aspect.REPLY_TO).nextInt(parentCandidates.size());
Message parentMessage = parentCandidates.get(parentIndex);
// memberships that overlap with the existence of the parent message
List<ForumMembership> validMemberships = new ArrayList<>();
for (ForumMembership membership : forum.getMemberships()) {
if ((membership.getCreationDate() < parentMessage.getCreationDate() && membership.getDeletionDate() > parentMessage.getCreationDate()) || membership.getCreationDate() < parentMessage.getDeletionDate() && membership.getDeletionDate() > parentMessage.getDeletionDate()) {
validMemberships.add(membership);
}
}
if (validMemberships.size() == 0) {
// skip if no valid membership
return Iterators.ForIterator.BREAK();
}
// get random membership from valid memberships - picking who created the comment
int membershipIndex = randomFarm.get(RandomGeneratorFarm.Aspect.MEMBERSHIP_INDEX).nextInt(validMemberships.size());
ForumMembership membership = validMemberships.get(membershipIndex);
TreeSet<Integer> tags = new TreeSet<>();
String content;
boolean isShort = false;
if (randomFarm.get(RandomGeneratorFarm.Aspect.REDUCED_TEXT).nextDouble() > 0.6666) {
List<Integer> currentTags = new ArrayList<>();
for (Integer tag : parentMessage.getTags()) {
if (randomFarm.get(RandomGeneratorFarm.Aspect.TAG).nextDouble() > 0.5) {
tags.add(tag);
}
currentTags.add(tag);
}
for (int j = 0; j < (int) Math.ceil(parentMessage.getTags().size() / 2.0); ++j) {
int randomTag = currentTags.get(randomFarm.get(RandomGeneratorFarm.Aspect.TAG).nextInt(currentTags.size()));
tags.add(Dictionaries.tagMatrix.getRandomRelated(randomFarm.get(RandomGeneratorFarm.Aspect.TOPIC), randomTag));
}
content = this.generator.generateText(membership.getPerson(), tags, prop);
} else {
isShort = true;
int index = randomFarm.get(RandomGeneratorFarm.Aspect.TEXT_SIZE).nextInt(shortComments.length);
content = shortComments[index];
}
// creation date
long minCreationDate = Math.max(parentMessage.getCreationDate(), membership.getCreationDate()) + DatagenParams.delta;
long maxCreationDate = Collections.min(Arrays.asList(membership.getDeletionDate(), parentMessage.getDeletionDate(), Dictionaries.dates.getSimulationEnd()));
if (maxCreationDate <= minCreationDate) {
return Iterators.ForIterator.CONTINUE();
}
// powerlaw distribtion
long creationDate = Dictionaries.dates.powerLawCommDateDay(randomFarm.get(RandomGeneratorFarm.Aspect.DATE), minCreationDate);
if (creationDate > maxCreationDate) {
return Iterators.ForIterator.CONTINUE();
}
long deletionDate;
boolean isExplicitlyDeleted;
// if person is a deleter and selected for delete
if (membership.getPerson().isMessageDeleter() && randomFarm.get(RandomGeneratorFarm.Aspect.DELETION_COMM).nextDouble() < DatagenParams.probCommentDeleted) {
isExplicitlyDeleted = true;
long minDeletionDate = creationDate + DatagenParams.delta;
long maxDeletionDate = Collections.min(Arrays.asList(parentMessage.getDeletionDate(), membership.getDeletionDate(), Dictionaries.dates.getSimulationEnd()));
if (maxDeletionDate <= minDeletionDate) {
return Iterators.ForIterator.CONTINUE();
}
deletionDate = Dictionaries.dates.powerLawDeleteDate(randomFarm.get(RandomGeneratorFarm.Aspect.DATE), minDeletionDate, maxDeletionDate);
} else {
isExplicitlyDeleted = false;
deletionDate = Collections.min(Arrays.asList(parentMessage.getDeletionDate(), membership.getDeletionDate()));
}
int country = membership.getPerson().getCountry();
IP ip = membership.getPerson().getIpAddress();
Random random = randomFarm.get(RandomGeneratorFarm.Aspect.DIFF_IP_FOR_TRAVELER);
if (PersonBehavior.changeUsualCountry(random, creationDate)) {
random = randomFarm.get(RandomGeneratorFarm.Aspect.COUNTRY);
country = Dictionaries.places.getRandomCountryUniform(random);
random = randomFarm.get(RandomGeneratorFarm.Aspect.IP);
ip = Dictionaries.ips.getIP(random, country);
}
Comment comment = new Comment(SN.formId(SN.composeId(idIterator.next(), creationDate), blockId), creationDate, deletionDate, membership.getPerson(), forum.getId(), content, new ArrayList<>(tags), country, ip, Dictionaries.browsers.getPostBrowserId(randomFarm.get(RandomGeneratorFarm.Aspect.DIFF_BROWSER), randomFarm.get(RandomGeneratorFarm.Aspect.BROWSER), membership.getPerson().getBrowserId()), post.getMessageId(), parentMessage.getMessageId(), isExplicitlyDeleted);
if (!isShort)
parentCandidates.add(new Comment(comment));
Stream<Like> likeStream = comment.getContent().length() > 10 && randomFarm.get(RandomGeneratorFarm.Aspect.NUM_LIKE).nextDouble() <= 0.1 ? likeGenerator.generateLikes(randomFarm.get(RandomGeneratorFarm.Aspect.DELETION_LIKES), randomFarm.get(RandomGeneratorFarm.Aspect.NUM_LIKE), forum, comment, Like.LikeType.COMMENT) : Stream.empty();
return Iterators.ForIterator.RETURN(new Pair<>(comment, likeStream));
}));
}
use of ldbc.snb.datagen.entities.dynamic.person.IP in project ldbc_snb_datagen_spark by ldbc.
the class PostGenerator method createPosts.
public Stream<Triplet<Post, Stream<Like>, Stream<Pair<Comment, Stream<Like>>>>> createPosts(RandomGeneratorFarm randomFarm, final Forum forum, final List<ForumMembership> memberships, long numPostsInForum, Iterator<Long> idIterator, long blockId) {
Properties properties = new Properties();
properties.setProperty("type", "post");
return memberships.stream().flatMap(member -> {
// generate number of posts by this member
double numPostsPerMember = numPostsInForum / (double) memberships.size();
if (numPostsPerMember < 1.0) {
double prob = randomFarm.get(RandomGeneratorFarm.Aspect.NUM_POST).nextDouble();
if (prob < numPostsPerMember)
numPostsPerMember = 1.0;
} else {
numPostsPerMember = Math.ceil(numPostsPerMember);
}
final int numPostsPerMemberInt = (int) numPostsPerMember;
// 0 to 20
int numComments = randomFarm.get(RandomGeneratorFarm.Aspect.NUM_COMMENT).nextInt(DatagenParams.maxNumComments + 1);
return Streams.stream(Iterators.forIterator(0, i -> i < numPostsPerMemberInt, i -> ++i, i -> {
// create post core
PostCore postCore = generatePostInfo(randomFarm.get(RandomGeneratorFarm.Aspect.DELETION_POST), randomFarm.get(RandomGeneratorFarm.Aspect.TAG), randomFarm.get(RandomGeneratorFarm.Aspect.DATE), forum, member, numComments);
if (postCore == null)
return Iterators.ForIterator.CONTINUE();
// create content, county, ip - sometimes randomise
String content = this.generator.generateText(member.getPerson(), postCore.getTags(), properties);
int country = member.getPerson().getCountry();
IP ip = member.getPerson().getIpAddress();
Random random = randomFarm.get(RandomGeneratorFarm.Aspect.DIFF_IP_FOR_TRAVELER);
if (PersonBehavior.changeUsualCountry(random, postCore.getCreationDate())) {
random = randomFarm.get(RandomGeneratorFarm.Aspect.COUNTRY);
country = Dictionaries.places.getRandomCountryUniform(random);
random = randomFarm.get(RandomGeneratorFarm.Aspect.IP);
ip = Dictionaries.ips.getIP(random, country);
}
Post post = new Post();
// create post with above information and from post info
post.initialize(SN.formId(SN.composeId(idIterator.next(), postCore.getCreationDate()), blockId), postCore.getCreationDate(), postCore.getDeletionDate(), member.getPerson(), forum.getId(), content, new ArrayList<>(postCore.getTags()), country, ip, Dictionaries.browsers.getPostBrowserId(randomFarm.get(RandomGeneratorFarm.Aspect.DIFF_BROWSER), randomFarm.get(RandomGeneratorFarm.Aspect.BROWSER), member.getPerson().getBrowserId()), forum.getLanguage(), postCore.isExplicitlyDeleted());
Stream<Like> likeStream = randomFarm.get(RandomGeneratorFarm.Aspect.NUM_LIKE).nextDouble() <= 0.1 ? likeGenerator.generateLikes(randomFarm.get(RandomGeneratorFarm.Aspect.DELETION_LIKES), randomFarm.get(RandomGeneratorFarm.Aspect.NUM_LIKE), forum, post, Like.LikeType.POST) : Stream.empty();
Stream<Pair<Comment, Stream<Like>>> commentStream = commentGenerator.createComments(randomFarm, forum, post, numComments, idIterator, blockId);
return Iterators.ForIterator.RETURN(new Triplet<>(post, likeStream, commentStream));
}));
});
}
use of ldbc.snb.datagen.entities.dynamic.person.IP in project ldbc_snb_datagen_spark by ldbc.
the class IPTest method testIPLogic.
@Test
public void testIPLogic() {
IP ip1 = new IP(192, 168, 1, 1, 24);
IP ip2 = new IP(192, 168, 1, 100, 24);
int network = 0xC0A80100;
assertTrue(ip1.getNetwork() == network);
assertTrue(ip2.getNetwork() == network);
}
use of ldbc.snb.datagen.entities.dynamic.person.IP in project ldbc_snb_datagen_spark by ldbc.
the class IPAddressDictionary method load.
/**
* @param mappingFileName The abbreviations per country.
* @param baseIPdir The base directory where ip files are found.
* @breif Loads dictionary.
*/
private void load(String mappingFileName, String baseIPdir) {
String line;
Map<String, String> countryAbbreMap = new HashMap<>();
try {
BufferedReader mappingFile = new BufferedReader(new InputStreamReader(getClass().getResourceAsStream(mappingFileName), "UTF-8"));
while ((line = mappingFile.readLine()) != null) {
String[] data = line.split(SEPARATOR_COUNTRY);
String abbr = data[0];
String countryName = data[1].trim().replace(" ", "_");
countryAbbreMap.put(countryName, abbr);
}
mappingFile.close();
List<Integer> countries = placeDictionary.getCountries();
for (int i = 0; i < countries.size(); i++) {
ipsByCountry.put(countries.get(i), new ArrayList<>());
// Get the name of file
String fileName = countryAbbreMap.get(placeDictionary.getPlaceName(countries.get(i)));
fileName = baseIPdir + "/" + fileName + ".zone";
BufferedReader ipZoneFile = new BufferedReader(new InputStreamReader(getClass().getResourceAsStream(fileName), "UTF-8"));
int j = 0;
while ((line = ipZoneFile.readLine()) != null && (j < MAX_IP_COUNTRY)) {
String[] data = line.split(SEPARATOR_IP);
String[] maskData = data[3].split(SEPARATOR_MASK);
int byte1 = Integer.valueOf(data[0]);
int byte2 = Integer.valueOf(data[1]);
int byte3 = Integer.valueOf(data[2]);
int byte4 = Integer.valueOf(maskData[0]);
int maskNum = Integer.valueOf(maskData[1]);
IP ip = new IP(byte1, byte2, byte3, byte4, maskNum);
ipsByCountry.get(i).add(ip);
j++;
}
ipZoneFile.close();
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of ldbc.snb.datagen.entities.dynamic.person.IP in project ldbc_snb_datagen_spark by ldbc.
the class IPAddressDictionary method getIP.
public IP getIP(Random random, int countryId) {
int finalLocationIndex = countryId;
while (!placeDictionary.getType(finalLocationIndex).equals(Place.COUNTRY)) {
finalLocationIndex = placeDictionary.belongsTo(finalLocationIndex);
}
List<IP> countryIPs = ipsByCountry.get(finalLocationIndex);
int idx = random.nextInt(countryIPs.size());
IP networkIp = countryIPs.get(idx);
int mask = networkIp.getMask();
int network = networkIp.getNetwork();
IP newIp = new IP(network | ((~mask) & random.nextInt()), mask);
return newIp;
}
Aggregations