diff --git a/mods/core/download_mods.rpy b/mods/core/download_mods.rpy index 8cc6714..5d75626 100644 --- a/mods/core/download_mods.rpy +++ b/mods/core/download_mods.rpy @@ -153,6 +153,46 @@ init python: modconfig.steam_modlist_preloader.load() + import modmenu_search + import time + + def set_query(value): + curr_screen_scope = renpy.current_screen().scope + curr_screen_scope["query"] = value + search_modlist(value, curr_screen_scope["author_query"]) + return + + def set_author_query(value): + curr_screen_scope = renpy.current_screen().scope + curr_screen_scope["author_query"] = value + search_modlist(curr_screen_scope["query"], value) + return + + + def search_modlist(query, author_query=""): + print "searching with {}, {}".format(query, author_query) + + # As renpy input doesn't allow for additional variables to this method, I've had to resort to this cursed thing + curr_screen_scope = renpy.current_screen().scope + modlist = curr_screen_scope["contents"] + page = curr_screen_scope["current_page"] + page_size = curr_screen_scope["PAGE_SIZE"] + use_steam = curr_screen_scope["use_steam"] + + s_time = time.time() + if query.strip() or author_query.strip(): # There's no reason to reorder the modlist if no search has been done. + reordered_modlist = modmenu_search.sort_best(query, modlist, author_query=author_query) + else: + reordered_modlist = curr_screen_scope["contents"] + print "Search took: {:.5}".format(time.time() - s_time) # Hopefully this never goes above 0.3 + + curr_screen_scope["search_order_contents"] = reordered_modlist + _refresh_modlist_page(page, page_size, reordered_modlist, use_steam) + renpy.restart_interaction() + + return + + init -1 python: import sys import math @@ -393,6 +433,10 @@ screen modmenu_paged(contents, use_steam): $ MIN_PAGE = 1 # Do note, modpage numbers are 1-indexed $ MAX_PAGE = int(math.ceil(len(contents) / float(PAGE_SIZE))) + default search_order_contents = contents + default query = "" + default author_query = "" + frame id "modmenu_paged" at alpha_dissolve: add "image/ui/ingame_menu_bg3.png" @@ -440,7 +484,7 @@ screen modmenu_paged(contents, use_steam): ycenter 0.5 # Tried to bind this to shift+scroll, but it didn't work... action [SetScreenVariable("current_page", max(current_page-5, MIN_PAGE)), - Function(_refresh_modlist_page, max(current_page-5, MIN_PAGE), PAGE_SIZE, contents, use_steam=use_steam) + Function(_refresh_modlist_page, max(current_page-5, MIN_PAGE), PAGE_SIZE, search_order_contents, use_steam=use_steam) ] sensitive (current_page > 1) @@ -449,7 +493,7 @@ screen modmenu_paged(contents, use_steam): ycenter 0.5 keysym "mousedown_4" action [SetScreenVariable("current_page", current_page-1), - Function(_refresh_modlist_page, current_page-1, PAGE_SIZE, contents, use_steam=use_steam) + Function(_refresh_modlist_page, current_page-1, PAGE_SIZE, search_order_contents, use_steam=use_steam) ] sensitive (current_page > 1) @@ -464,7 +508,7 @@ screen modmenu_paged(contents, use_steam): ycenter 0.5 keysym "mousedown_5" action [SetScreenVariable("current_page", current_page+1), - Function(_refresh_modlist_page, current_page+1, PAGE_SIZE, contents, use_steam=use_steam) + Function(_refresh_modlist_page, current_page+1, PAGE_SIZE, search_order_contents, use_steam=use_steam) ] sensitive (current_page < MAX_PAGE) @@ -473,14 +517,102 @@ screen modmenu_paged(contents, use_steam): ycenter 0.5 # Also tried to bind this to shift+scroll, but it didn't work... action [SetScreenVariable("current_page", min(current_page+5, MAX_PAGE)), - Function(_refresh_modlist_page, min(current_page+5, MAX_PAGE), PAGE_SIZE, contents, use_steam=use_steam) + Function(_refresh_modlist_page, min(current_page+5, MAX_PAGE), PAGE_SIZE, search_order_contents, use_steam=use_steam) ] sensitive (current_page < MAX_PAGE) - on "show" action [Function(_refresh_modlist_page, current_page, PAGE_SIZE, contents, use_steam=use_steam), - Function(_preload_mod_images, contents, None)] - on "hide" action [Function(mod_image_preloader.clear)] + hbox: + xpos 65 + ypos 10 + xanchor 0.0 + yanchor 0.0 + xsize 425 + ysize 70 + + spacing 10 + + vbox: + xalign 0.0 + ycenter 0.5 + xsize 75 + spacing 6 + + # For some reason 'label' and 'text' text components insisted on being ever so slightly larger than necessary, which made everything look misaligned + textbutton "author:": + background "#00000000" + text_size 24 + ysize 32 + xalign 0.0 + + textbutton "mod:": + background "#00000000" + text_size 24 + ysize 32 + xalign 0.0 + + vbox: + xalign 0.0 + ycenter 0.5 + spacing 6 + + default focus_query_input = False + default focus_author_query_input = False + + # input components aggressively capture focus, to the point where you can't use more than one of them in a single screen. + # a button is used to circumvent this, as it is a container that can itself hold focus, so it is able to intercept the aggressive behaviour. + button: + background If(focus_author_query_input, "#FFFFFFCD", "#000000CD") + hover_background If(focus_author_query_input, "#BFBFFFCD", "#000040CD") + activate_sound None + key_events focus_author_query_input + action [ToggleScreenVariable("focus_author_query_input"), SetScreenVariable("focus_query_input", False)] + xfill True + ysize 32 + xpadding 0 + + input: + color If(focus_author_query_input, "#000", "#FF7F00") + xalign 0.0 + ycenter 0.5 + size 24 + pixel_width 320 # While the horizontal space is supposed to be 340, The inputs have a tendency to drop down a row... + changed set_author_query + + + button: + background If(focus_query_input, "#FFFFFFCD", "#000000CD") + hover_background If(focus_query_input, "#BFBFFFCD", "#000040CD") + activate_sound None + key_events focus_query_input + action [ToggleScreenVariable("focus_query_input"), SetScreenVariable("focus_author_query_input", False)] + xfill True + ysize 32 + xpadding 0 + + input: + color If(focus_query_input, "#000", "#FFFF00") + xalign 0.0 + ycenter 0.5 + size 24 + pixel_width 320 + changed set_query + key "K_ESCAPE" action [SetScreenVariable("focus_query_input", False), SetScreenVariable("focus_author_query_input", False)] + key "K_TAB" action [ToggleScreenVariable("focus_author_query_input"), + If(focus_author_query_input, + ToggleScreenVariable("focus_query_input"), + SetScreenVariable("focus_query_input", False))] + + + on "show" action [Function(_refresh_modlist_page, current_page, PAGE_SIZE, contents, use_steam=use_steam), + Function(_preload_mod_images, contents, None), + Function(im.cache.clear) # I tended to get 'out of memory' errors on this menu, so we use this precaution + ] + + on "hide" action [Function(mod_image_preloader.clear), # Cleanup after ourselves + Function(im.cache.clear), + Function(modmenu_search.clear_cache), + ] diff --git a/mods/core/modules/modmenu_search.py b/mods/core/modules/modmenu_search.py new file mode 100644 index 0000000..52030bb --- /dev/null +++ b/mods/core/modules/modmenu_search.py @@ -0,0 +1,171 @@ + +def cache(function): + def inner(*args): + if not hasattr(inner, "results"): + inner.results = {args: function(*args)} + elif args not in inner.results: + inner.results[args] = function(*args) + return inner.results[args] + def clear_cache(): + if hasattr(inner, "results"): + inner.results.clear() + inner.clear_cache = clear_cache + return inner + +# Copied from nltk (https://www.nltk.org/_modules/nltk/metrics/distance.html#jaro_similarity) +@cache +def jaro_similarity(s1, s2): + """ + Computes the Jaro similarity between 2 sequences from: + + Matthew A. Jaro (1989). Advances in record linkage methodology + as applied to the 1985 census of Tampa Florida. Journal of the + American Statistical Association. 84 (406): 414-20. + + The Jaro distance between is the min no. of single-character transpositions + required to change one word into another. The Jaro similarity formula from + https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance : + + ``jaro_sim = 0 if m = 0 else 1/3 * (m/|s_1| + m/s_2 + (m-t)/m)`` + + where + - `|s_i|` is the length of string `s_i` + - `m` is the no. of matching characters + - `t` is the half no. of possible transpositions. + """ + # First, store the length of the strings + # because they will be re-used several times. + len_s1, len_s2 = len(s1), len(s2) + + # The upper bound of the distance for being a matched character. + match_bound = max(int(max(len_s1, len_s2) / 2) - 1, 0) # My one change from the original algorithm: allows two length=1 words to match if they are the same word. + + # Initialize the counts for matches and transpositions. + matches = 0 # no.of matched characters in s1 and s2 + transpositions = 0 # no. of transpositions between s1 and s2 + flagged_1 = [] # positions in s1 which are matches to some character in s2 + flagged_2 = [] # positions in s2 which are matches to some character in s1 + + # Iterate through sequences, check for matches and compute transpositions. + for i in range(len_s1): # Iterate through each character. + upperbound = min(i + match_bound, len_s2 - 1) + lowerbound = max(0, i - match_bound) + for j in range(lowerbound, upperbound + 1): + if s1[i] == s2[j] and j not in flagged_2: + matches += 1 + flagged_1.append(i) + flagged_2.append(j) + break + flagged_2.sort() + for i, j in zip(flagged_1, flagged_2): + if s1[i] != s2[j]: + transpositions += 1 + + if matches == 0: + return 0 + else: + matches = float(matches) + return ( + 1 / 3.0 + * ( + matches / len_s1 + + matches / len_s2 + + (matches - int(transpositions / 2)) / matches + ) + ) + + + +_jaro_best_match_cache = {} + + +def jaro_set_similarity(query_set, target1, target2): + """Finds approximate similarity between word set query_set and target word strings target1 and target2. + each word in query_set is compared against all of target1's and target2's words to find the best match. + the similarity is then the average of each of those best similarity numbers. + these results are cached by target1 for each word of query_set, and as such, for each value of target1 there should only be a single value of target2.""" + + + best_similarity_1 = {} + best_similarity_2 = {} + for query_word in query_set: + if (query_word, target1) not in _jaro_best_match_cache: + t1_word_set = set(target1.lower().split()) + curr_best_1 = 0 + for target_word in t1_word_set: + curr_best_1 = max(curr_best_1, jaro_similarity(query_word, target_word)) + best_similarity_1[query_word] = curr_best_1 + + t2_word_set = set(target2.lower().split()) + curr_best_2 = 0 + for target_word in t2_word_set: + curr_best_2 = max(curr_best_2, jaro_similarity(query_word, target_word)) + best_similarity_2[query_word] = curr_best_2 + + _jaro_best_match_cache[(query_word, target1)] = (curr_best_1, curr_best_2) + else: + sim1, sim2 = _jaro_best_match_cache[(query_word, target1)] + best_similarity_1[query_word] = sim1 + best_similarity_2[query_word] = sim2 + + n_values = len(query_set) + return (sum(best_similarity_1[query_word] for query_word in query_set) / n_values, + sum(best_similarity_2[query_word] for query_word in query_set) / n_values) + + + +def jaro_split_compare(query, modlist): + """Compare the modlist to the query using jaro similarity on each word. + :returns dict from modname to similarity tuple, which contains the similarity of query to modname, then the similarity of query to mod description. + """ + comps = {} + query_words = set(query.lower().split()) + + for _, name, _, desc, _ in modlist: + comps[name] = jaro_set_similarity(query_words, name, desc) + + return comps + +def jaro_author_compare(author_query, modlist): + comps = {} + author_query = author_query.lower() + + for _, name, author, _, _ in modlist: + comps[name] = jaro_similarity(author_query, author.lower()) + + return comps + + +def sort_best(query, modlist, author_query="", return_score=False): + """Sort mods by best match to query""" + if query.strip(): + similarities = jaro_split_compare(query, modlist) + else: + similarities = {name: (0.0, 0.0) for _, name, _, _, _ in modlist} + if author_query.strip(): + author_similarities = jaro_author_compare(author_query, modlist) + else: + author_similarities = {name: 0.0 for name in similarities.iterkeys()} + similarities = {name: scores + (author_similarities[name],) for name, scores in similarities.iteritems()} # Much easier to deal with if it's a single iterable + + # Sort by best match, with bias to strong modname matches and strong authorname matches + # This bias is useful as the description normally takes the stronger value, unless the mod name is searched specifically. + # Max gave me better results than sum, so I used it. + comp_func = lambda e: (max(e[1][:2]) + (int(e[1][0] > 0.9) * e[1][0]) + (int(e[1][2] > 0.7) * e[1][2])) + mod_order = list(sorted(similarities.items(), key=comp_func, reverse=True)) + + if comp_func(mod_order[0]) <= 0.3: # All bad matches, don't reorder + print "No good matches. reordering suppressed" + mod_order = list((name, 0.0) for _, name, _, _, _ in modlist) + + mods_by_name = {mod[1]: mod for mod in modlist} + + if return_score: + return [(mods_by_name[name], score) for name, score in mod_order] + else: + return [mods_by_name[name] for name, _ in mod_order] + + +def clear_cache(): + _jaro_best_match_cache.clear() + jaro_similarity.clear_cache() \ No newline at end of file