Skip to content

Commit 8c44c8b

Browse files
Peter JohnsonPeter Johnson
authored andcommitted
Pkl import error handling
1 parent 06b8973 commit 8c44c8b

1 file changed

Lines changed: 18 additions & 16 deletions

File tree

evaluation_function/models/shannon_words_ngram.py

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010

1111
import sys, traceback
1212
def log(msg):
13-
sys.stdout.write(msg + "\n")
14-
sys.stdout.flush()
13+
sys.stderr.write(msg + "\n")
14+
sys.stderr.flush()
1515

1616
log(f"[DEBUG] Starting shannon_words_ngram.py")
1717

@@ -30,22 +30,24 @@ def log(msg):
3030
# If creating locally, to be copied when deployed:
3131
FILE = MODEL_DIR / "ngram_counts.pkl.bz2"
3232

33-
def get_counts(n=3):
33+
def get_counts(n=3, dev):
3434
print(f"Loading/building n-gram counts for n={n}...")
3535
if os.path.exists(FILE):
36-
with bz2.BZ2File(FILE, "rb") as f:
37-
cache = pickle.load(f)
38-
else: # from here the deployed version will not work because the corpora are not bundled (to save space)
36+
try:
37+
with bz2.BZ2File(FILE, "rb") as f:
38+
cache = pickle.load(f)
39+
except Exception as e:
40+
raise RuntimeError(f"Failed to load {FILE}: {e}")
41+
elif dev: # from here the deployed version will not work because the corpora are not bundled (to save space)
3942
cache = {}
40-
if n not in cache:
41-
print(f"Building counts for n={n} (this may take a while)...")
42-
cache[n] = build_counts(n, START, END) # similarly, only works if NLTK corpora are available
4343
try:
44-
with bz2.BZ2File(FILE, "wb") as f:
45-
pickle.dump(cache, f)
44+
cache[n] = build_counts(n, START, END) # only works if NLTK corpora are available
45+
with bz2.BZ2File(FILE, "rb") as f:
46+
cache = pickle.load(f)
4647
except Exception as e:
47-
print(f"Warning: couldn't save n-gram cache to {FILE}: {e}")
48-
48+
raise RuntimeError(f"Failed to rebuild or save n-gram counts: {e}")
49+
else:
50+
raise FileNotFoundError(f"N-gram counts file not found at {FILE}, and dev mode is off so counts not generated.")
4951
counts = cache[n]
5052
if n == 1:
5153
counts.setdefault((), {}) # CHANGE: ensure unigram context exists
@@ -59,10 +61,10 @@ def sample_next(counts, ctx):
5961
words, freqs = zip(*options.items())
6062
return random.choices(words, freqs)[0]
6163

62-
def generate(start="", max_len=20, n=None):
64+
def generate(start="", max_len=20, n=None, dev=False):
6365
start_tokens = start.lower().split()
6466
n = max(2, len(start_tokens) + 1) if n is None else n # Note the requirement n>1, otherwise there's 'no context' and the model fails
65-
counts = get_counts(n)
67+
counts = get_counts(n,dev=dev)
6668
start_tokens = start.lower().split()
6769
need = n-1
6870
ctx = tuple((([START]*need) + start_tokens)[-need:]) if need else ()
@@ -89,7 +91,7 @@ def run(response, answer, params:Params) -> Result:
8991
response_used = isinstance(response, str)
9092
context = response if response_used else "the general" # Default context
9193
context_window = params.get("context_window", 3) or 3
92-
output.append(generate(context,word_count,context_window))
94+
output.append(generate(context,word_count,context_window,dev=params.get("dev", False)))
9395
preface = 'Context window: '+str(context_window)+', Word count: '+str(word_count)+'. Output: <br>'
9496
feedback_items = [("general", preface + ' '.join(output))]
9597
#feedback_items.append("| Answer not an integer; used default context window") if not response_used else None

0 commit comments

Comments
 (0)