use of com.biglybt.core.content.RelatedContentManager.DownloadInfo in project BiglyBT by BiglySoftware.
the class RelatedContentSearcher method updateKeyBloom.
protected void updateKeyBloom(RelatedContentManager.ContentCache cc) {
synchronized (manager.rcm_lock) {
Set<String> dht_only_words = new HashSet<>();
Set<String> non_dht_words = new HashSet<>();
List<DownloadInfo> dht_infos = getDHTInfos(SEARCH_CVS_ONLY_DEFAULT);
Iterator<DownloadInfo> it_dht = dht_infos.iterator();
Iterator<DownloadInfo> it_transient = RelatedContentManager.transient_info_cache.values().iterator();
Iterator<DownloadInfo> it_rc = cc.related_content.values().iterator();
for (Iterator<DownloadInfo> it : new Iterator[] { it_transient, it_rc, it_dht }) {
while (it.hasNext()) {
DownloadInfo di = it.next();
List<String> words = getDHTWords(di);
for (String word : words) {
if (it == it_dht) {
if (!non_dht_words.contains(word)) {
dht_only_words.add(word);
}
} else {
non_dht_words.add(word);
}
}
}
}
int all_desired_bits = (dht_only_words.size() + non_dht_words.size()) * KEY_BLOOM_LOAD_FACTOR;
all_desired_bits = Math.max(all_desired_bits, KEY_BLOOM_MIN_BITS);
all_desired_bits = Math.min(all_desired_bits, KEY_BLOOM_MAX_BITS);
BloomFilter all_bloom = BloomFilterFactory.createAddOnly(all_desired_bits);
int non_dht_desired_bits = non_dht_words.size() * KEY_BLOOM_LOAD_FACTOR;
non_dht_desired_bits = Math.max(non_dht_desired_bits, KEY_BLOOM_MIN_BITS);
non_dht_desired_bits = Math.min(non_dht_desired_bits, KEY_BLOOM_MAX_BITS);
BloomFilter non_dht_bloom = BloomFilterFactory.createAddOnly(non_dht_desired_bits);
List<String> non_dht_words_rand = new ArrayList<>(non_dht_words);
Collections.shuffle(non_dht_words_rand);
for (String word : non_dht_words_rand) {
try {
byte[] bytes = word.getBytes("UTF8");
all_bloom.add(bytes);
if (all_bloom.getEntryCount() >= KEY_BLOOM_MAX_ENTRIES) {
break;
}
if (non_dht_bloom.getEntryCount() < KEY_BLOOM_MAX_ENTRIES) {
non_dht_bloom.add(bytes);
}
} catch (Throwable e) {
}
}
List<String> dht_only_words_rand = new ArrayList<>(dht_only_words);
Collections.shuffle(dht_only_words_rand);
for (String word : dht_only_words_rand) {
try {
byte[] bytes = word.getBytes("UTF8");
all_bloom.add(bytes);
if (all_bloom.getEntryCount() >= KEY_BLOOM_MAX_ENTRIES) {
break;
}
} catch (Throwable e) {
}
}
logSearch("blooms=" + all_bloom.getSize() + "/" + all_bloom.getEntryCount() + ", " + non_dht_bloom.getSize() + "/" + non_dht_bloom.getEntryCount() + ": rcm=" + cc.related_content.size() + ", trans=" + RelatedContentManager.transient_info_cache.size() + ", dht=" + dht_infos.size());
key_bloom_with_local = all_bloom;
key_bloom_without_local = non_dht_bloom;
last_key_bloom_update = SystemTime.getMonotonousTime();
}
}
use of com.biglybt.core.content.RelatedContentManager.DownloadInfo in project BiglyBT by BiglySoftware.
the class RelatedContentSearcher method getDHTInfos.
private List<DownloadInfo> getDHTInfos(boolean search_cvs_only) {
List<DHTPluginValue> vals;
if (search_cvs_only) {
if (dht_plugin instanceof DHTPlugin) {
vals = ((DHTPlugin) dht_plugin).getValues(DHTPlugin.NW_AZ_CVS, false);
} else {
vals = dht_plugin.getValues();
}
} else {
vals = dht_plugin.getValues();
}
Set<String> unique_keys = new HashSet<>();
List<DownloadInfo> dht_infos = new ArrayList<>();
for (DHTPluginValue val : vals) {
if (!val.isLocal()) {
byte[] bytes = val.getValue();
String test = new String(bytes);
if (test.startsWith("d1:d") && test.endsWith("ee") && test.contains("1:h20:")) {
try {
Map map = BDecoder.decode(bytes);
DownloadInfo info = manager.decodeInfo(map, null, 1, false, unique_keys);
if (info != null) {
dht_infos.add(info);
}
} catch (Throwable e) {
}
}
}
}
return (dht_infos);
}
use of com.biglybt.core.content.RelatedContentManager.DownloadInfo in project BiglyBT by BiglySoftware.
the class RelatedContentSearcher method matchContent.
List<RelatedContent> matchContent(final String term, int min_seeds, int min_leechers, boolean is_local, boolean search_cvs_only) {
final boolean is_popularity = isPopularity(term);
// term is made up of space separated bits - all bits must match
// each bit can be prefixed by + or -, a leading - means 'bit doesn't match'. + doesn't mean anything
// each bit (with prefix removed) can be "(" regexp ")"
// if bit isn't regexp but has "|" in it it is turned into a regexp so a|b means 'a or b'
String[] bits = Constants.PAT_SPLIT_SPACE.split(term.toLowerCase());
int[] bit_types = new int[bits.length];
Pattern[] bit_patterns = new Pattern[bits.length];
for (int i = 0; i < bits.length; i++) {
String bit = bits[i] = bits[i].trim();
if (bit.length() > 0) {
char c = bit.charAt(0);
if (c == '+') {
bit_types[i] = 1;
bit = bits[i] = bit.substring(1);
} else if (c == '-') {
bit_types[i] = 2;
bit = bits[i] = bit.substring(1);
}
if (bit.startsWith("(") && bit.endsWith((")"))) {
bit = bit.substring(1, bit.length() - 1);
try {
if (!RegExUtil.mightBeEvil(bit)) {
bit_patterns[i] = Pattern.compile(bit, Pattern.CASE_INSENSITIVE);
}
} catch (Throwable e) {
}
} else if (bit.contains("|")) {
if (!bit.contains("tag:")) {
try {
if (!RegExUtil.mightBeEvil(bit)) {
bit_patterns[i] = Pattern.compile(bit, Pattern.CASE_INSENSITIVE);
}
} catch (Throwable e) {
}
}
}
}
}
Map<String, RelatedContent> result = new HashMap<>();
Iterator<DownloadInfo> it1 = getDHTInfos(search_cvs_only).iterator();
Iterator<DownloadInfo> it2;
synchronized (manager.rcm_lock) {
it2 = new ArrayList<>(RelatedContentManager.transient_info_cache.values()).iterator();
}
Iterator<DownloadInfo> it3 = manager.getRelatedContentAsList().iterator();
for (Iterator<DownloadInfo> it : new Iterator[] { it1, it2, it3 }) {
while (it.hasNext()) {
DownloadInfo c = it.next();
if (c.getSeeds() < min_seeds || c.getLeechers() < min_leechers) {
continue;
}
String title = c.getTitle();
String lc_title = c.getTitle().toLowerCase();
boolean match = true;
boolean at_least_one = false;
byte[] hash = c.getHash();
if (term.startsWith("hash:") && hash != null && term.substring(5).equals(Base32.encode(hash))) {
// direct hash based match
at_least_one = true;
} else if (title.equalsIgnoreCase(term) && term.trim().length() > 0) {
// pick up a direct match regardless of anything else
at_least_one = true;
} else {
for (int i = 0; i < bits.length; i++) {
String bit = bits[i];
if (bit.length() > 0) {
boolean hit;
if (bit_patterns[i] == null) {
String[] sub_bits = bit.split("\\|");
hit = false;
for (String sub_bit : sub_bits) {
if (sub_bit.startsWith("tag:")) {
String[] tags = c.getTags();
hit = false;
if (tags != null && tags.length > 0) {
String target_tag = sub_bit.substring(4).toLowerCase(Locale.US);
target_tag = unescapeTag(target_tag);
target_tag = manager.truncateTag(target_tag);
for (String t : tags) {
if (t.startsWith(target_tag)) {
hit = true;
break;
}
}
}
} else {
hit = lc_title.contains(sub_bit);
}
if (hit) {
break;
}
}
} else {
hit = bit_patterns[i].matcher(lc_title).find();
}
int type = bit_types[i];
if (hit) {
if (type == 2) {
match = false;
break;
} else {
at_least_one = true;
}
} else {
if (type == 2) {
at_least_one = true;
} else {
match = false;
break;
}
}
}
}
}
if (match && at_least_one) {
String key;
if (hash != null) {
key = Base32.encode(hash);
} else {
key = manager.getPrivateInfoKey(c);
}
result.put(key, c);
}
}
}
List<RelatedContent> list = new ArrayList<>(result.values());
int max = is_local ? (is_popularity ? MAX_LOCAL_POPULAR_RESULTS : Integer.MAX_VALUE) : MAX_REMOTE_SEARCH_RESULTS;
if (list.size() > max) {
Collections.sort(list, new Comparator<RelatedContent>() {
@Override
public int compare(RelatedContent o1, RelatedContent o2) {
if (is_popularity) {
int v1 = o1.getVersion();
int v2 = o2.getVersion();
if (v1 == v2) {
long sl1 = o1.getSeeds() + o1.getLeechers();
long sl2 = o2.getSeeds() + o2.getLeechers();
long diff = sl2 - sl1;
if (diff < 0) {
return (-1);
} else if (diff > 0) {
return (1);
} else {
return (0);
}
} else {
return (v2 - v1);
}
} else {
return (o2.getRank() - o1.getRank());
}
}
});
list = list.subList(0, max);
}
return (list);
}
use of com.biglybt.core.content.RelatedContentManager.DownloadInfo in project BiglyBT by BiglySoftware.
the class RelatedContentSearcher method testKeyBloom.
private void testKeyBloom() {
if (true) {
return;
}
System.out.println("test key bloom");
try {
Map<String, int[]> all_words = new HashMap<>();
synchronized (manager.rcm_lock) {
ContentCache cache = manager.loadRelatedContent();
List<DownloadInfo> dht_infos = getDHTInfos(false);
Iterator<DownloadInfo> it_dht = dht_infos.iterator();
Iterator<DownloadInfo> it_transient = RelatedContentManager.transient_info_cache.values().iterator();
Iterator<DownloadInfo> it_rc = cache.related_content.values().iterator();
updateKeyBloom(cache);
int i = 0;
for (Iterator _it : new Iterator[] { it_transient, it_rc, it_dht }) {
Iterator<DownloadInfo> it = (Iterator<DownloadInfo>) _it;
while (it.hasNext()) {
DownloadInfo di = it.next();
List<String> words = getDHTWords(di);
for (String word : words) {
int[] x = all_words.get(word);
if (x == null) {
x = new int[3];
all_words.put(word, x);
}
x[i] = 1;
}
}
i++;
}
}
BloomFilter bloom = getKeyBloom(true);
int total = 0;
int clashes = 0;
int misses = 0;
int match_fails = 0;
Random random = new Random();
for (Map.Entry<String, int[]> entry : all_words.entrySet()) {
String word = entry.getKey();
int[] source = entry.getValue();
boolean r1 = bloom.contains(word.getBytes("UTF-8"));
boolean r2 = bloom.contains((word + random.nextLong()).getBytes("UTF-8"));
System.out.println(word + " -> " + r1 + "/" + r2);
total++;
if (r1 && r2) {
clashes++;
}
if (!r1) {
misses++;
}
List<RelatedContent> hits = matchContent(word, SEARCH_MIN_SEEDS_DEFAULT, SEARCH_MIN_LEECHERS_DEFAULT, true, false);
if (hits.size() == 0) {
hits = matchContent(word, SEARCH_MIN_SEEDS_DEFAULT, SEARCH_MIN_LEECHERS_DEFAULT, true, false);
match_fails++;
}
}
System.out.println("total=" + total + ", clash=" + clashes + ", miss=" + misses + ", fails=" + match_fails + ", bloom=" + bloom.getString());
} catch (Throwable e) {
e.printStackTrace();
}
}
Aggregations