wordHunt/app.py at main · CodeRafay/wordHunt · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
from __future__ import annotations

from datetime import date

import numpy as np
import streamlit as st
from sentence_transformers import SentenceTransformer

from utils import find_rank_and_score, load_faiss_index, load_words, rank_to_color

MODEL_NAME = "all-MiniLM-L6-v2"


@st.cache_resource
def get_words() -> list[str]:
    return load_words()


@st.cache_resource
def get_index():
    return load_faiss_index()


@st.cache_resource
def get_model() -> SentenceTransformer:
    return SentenceTransformer(MODEL_NAME)


def normalize_query(word: str) -> str:
    return word.strip().lower()


def embed_query(model: SentenceTransformer, word: str) -> np.ndarray:
    query_vector = model.encode(
        [word], show_progress_bar=False, convert_to_numpy=True).astype(np.float32)
    norm = np.linalg.norm(query_vector, axis=1, keepdims=True)
    query_vector = query_vector / np.maximum(norm, 1e-12)
    return query_vector


def evaluate_guess(guess: str, target_index: int) -> tuple[int, float]:
    model = get_model()
    index = get_index()

    query_vector = embed_query(model, guess)
    distances, indices = index.search(query_vector, k=index.ntotal)
    return find_rank_and_score(indices, distances, target_index)


def get_hint_candidate(
    target_index: int,
    words: list[str],
    low_rank: int,
    high_rank: int,
    fallback_low: int,
    fallback_high: int,
) -> tuple[str, int] | None:
    index = get_index()
    target_vector = np.asarray(index.reconstruct(
        target_index), dtype=np.float32).reshape(1, -1)
    _, indices = index.search(target_vector, k=index.ntotal)

    for pos, candidate_index in enumerate(indices[0], start=1):
        if candidate_index == target_index:
            continue
        if low_rank <= pos <= high_rank:
            return words[int(candidate_index)], pos

    for pos, candidate_index in enumerate(indices[0], start=1):
        if candidate_index == target_index:
            continue
        if fallback_low <= pos <= fallback_high:
            return words[int(candidate_index)], pos

    return None


def render_guess(word: str, rank: int, score: float) -> None:
    background_color = rank_to_color(rank)
    if rank == 1 or 2 <= rank <= 1000:
        text_color = "#ffffff"
    else:
        text_color = "#111111"

    st.markdown(
        (
            f"<div style='background:{background_color};color:{text_color};padding:10px;border-radius:8px;margin-bottom:8px;'>"
            f"<b>{word}</b> — Rank: <b>{rank}</b> — Similarity: <b>{score:.5f}</b>"
            "</div>"
        ),
        unsafe_allow_html=True,
    )


def main() -> None:
    st.set_page_config(page_title="WordHunt", page_icon="🎯")
    st.title("🎯 WordHunt")
    st.caption("Guess the hidden daily word using semantic similarity.")

    words = get_words()
    word_set = set(words)
    get_index()

    if "guess_count" not in st.session_state:
        st.session_state.guess_count = 0
    if "guesses" not in st.session_state:
        st.session_state.guesses = []
    if "word_offset" not in st.session_state:
        st.session_state.word_offset = 0
    if "solved" not in st.session_state:
        st.session_state.solved = any(
            item["rank"] == 1 for item in st.session_state.guesses)
    if "hints_used" not in st.session_state:
        st.session_state.hints_used = 0
    if "hint_messages" not in st.session_state:
        st.session_state.hint_messages = []

    target_index = (date.today().toordinal() +
                    st.session_state.word_offset) % len(words)
    target_word = words[target_index]

    solved = st.session_state.solved

    if solved:
        st.success(f"Game complete! The word is '{target_word}'.")
        if st.button("Restart Game"):
            st.session_state.word_offset += 1
            for key in ("guess_count", "guesses", "hints_used", "hint_messages", "solved"):
                if key in st.session_state:
                    del st.session_state[key]
            st.rerun()

    with st.form("guess_form", clear_on_submit=True):
        user_guess = st.text_input("Enter a word", disabled=solved)
        submitted = st.form_submit_button("Submit", disabled=solved)

    hint_clicked = st.button(
        f"Get Hint ({st.session_state.hints_used}/3 used)",
        disabled=solved or st.session_state.hints_used >= 3,
    )

    if hint_clicked:
        if st.session_state.hints_used >= 3:
            st.info("No hints remaining.")
        else:
            next_hint = st.session_state.hints_used + 1
            if next_hint == 1:
                candidate = get_hint_candidate(
                    target_index,
                    words,
                    low_rank=5001,
                    high_rank=10000,
                    fallback_low=4000,
                    fallback_high=12000,
                )
                if candidate is None:
                    st.session_state.hint_messages.append(
                        "Hint 1: No candidate in 5000-10000 range, but you are looking for a mid-distance semantic neighbor."
                    )
                else:
                    hint_word, hint_rank = candidate
                    st.session_state.hint_messages.append(
                        f"Hint 1: Try '{hint_word}' (its rank is around {hint_rank}, near the 5000-10000 zone)."
                    )
            elif next_hint == 2:
                candidate = get_hint_candidate(
                    target_index,
                    words,
                    low_rank=501,
                    high_rank=5000,
                    fallback_low=300,
                    fallback_high=6000,
                )
                if candidate is None:
                    st.session_state.hint_messages.append(
                        "Hint 2: No candidate in 500-5000 range, so focus on words that are clearly closer in meaning."
                    )
                else:
                    hint_word, hint_rank = candidate
                    st.session_state.hint_messages.append(
                        f"Hint 2: Try '{hint_word}' (its rank is around {hint_rank}, near the 500-5000 zone)."
                    )
            else:
                st.session_state.hint_messages.append(
                    f"Hint 3: The hidden word is '{target_word}'."
                )
                st.session_state.solved = True

            st.session_state.hints_used = next_hint
            st.rerun()

    if submitted:
        guess = normalize_query(user_guess)
        guessed_words = {g["word"] for g in st.session_state.guesses}

        if not guess:
            st.info("Please enter a word.")
        elif not guess.isalpha():
            st.warning("Use alphabetic words only.")
        elif guess not in word_set:
            st.warning("That word is not in the game dictionary.")
        elif guess in guessed_words:
            st.info("You already guessed that word.")
        else:
            rank, score = evaluate_guess(guess, target_index)
            st.session_state.guess_count += 1
            st.session_state.guesses.append(
                {
                    "word": guess,
                    "rank": rank,
                    "score": score,
                }
            )
            if rank == 1:
                st.session_state.solved = True
                st.rerun()

    if st.session_state.guesses:
        best_rank = min(item["rank"] for item in st.session_state.guesses)
    else:
        best_rank = "N/A"

    st.subheader("Statistics")
    st.write(f"Total guesses: {st.session_state.guess_count}")
    st.write(f"Best rank: {best_rank}")
    st.write(f"Hints used: {st.session_state.hints_used}/3")

    if st.session_state.hint_messages:
        st.subheader("Hints")
        for message in st.session_state.hint_messages:
            st.info(message)

    st.subheader("Guess History")
    sorted_guesses = sorted(st.session_state.guesses,
                            key=lambda item: item["rank"])
    for item in sorted_guesses:
        render_guess(item["word"], item["rank"], item["score"])


if __name__ == "__main__":
    main()