1010
1111import sys , traceback
1212def log (msg ):
13- sys .stdout .write (msg + "\n " )
14- sys .stdout .flush ()
13+ sys .stderr .write (msg + "\n " )
14+ sys .stderr .flush ()
1515
1616log (f"[DEBUG] Starting shannon_words_ngram.py" )
1717
@@ -30,22 +30,24 @@ def log(msg):
3030# If creating locally, to be copied when deployed:
3131FILE = MODEL_DIR / "ngram_counts.pkl.bz2"
3232
33- def get_counts (n = 3 ):
33+ def get_counts (n = 3 , dev ):
3434 print (f"Loading/building n-gram counts for n={ n } ..." )
3535 if os .path .exists (FILE ):
36- with bz2 .BZ2File (FILE , "rb" ) as f :
37- cache = pickle .load (f )
38- else : # from here the deployed version will not work because the corpora are not bundled (to save space)
36+ try :
37+ with bz2 .BZ2File (FILE , "rb" ) as f :
38+ cache = pickle .load (f )
39+ except Exception as e :
40+ raise RuntimeError (f"Failed to load { FILE } : { e } " )
41+ elif dev : # from here the deployed version will not work because the corpora are not bundled (to save space)
3942 cache = {}
40- if n not in cache :
41- print (f"Building counts for n={ n } (this may take a while)..." )
42- cache [n ] = build_counts (n , START , END ) # similarly, only works if NLTK corpora are available
4343 try :
44- with bz2 .BZ2File (FILE , "wb" ) as f :
45- pickle .dump (cache , f )
44+ cache [n ] = build_counts (n , START , END ) # only works if NLTK corpora are available
45+ with bz2 .BZ2File (FILE , "rb" ) as f :
46+ cache = pickle .load (f )
4647 except Exception as e :
47- print (f"Warning: couldn't save n-gram cache to { FILE } : { e } " )
48-
48+ raise RuntimeError (f"Failed to rebuild or save n-gram counts: { e } " )
49+ else :
50+ raise FileNotFoundError (f"N-gram counts file not found at { FILE } , and dev mode is off so counts not generated." )
4951 counts = cache [n ]
5052 if n == 1 :
5153 counts .setdefault ((), {}) # CHANGE: ensure unigram context exists
@@ -59,10 +61,10 @@ def sample_next(counts, ctx):
5961 words , freqs = zip (* options .items ())
6062 return random .choices (words , freqs )[0 ]
6163
62- def generate (start = "" , max_len = 20 , n = None ):
64+ def generate (start = "" , max_len = 20 , n = None , dev = False ):
6365 start_tokens = start .lower ().split ()
6466 n = max (2 , len (start_tokens ) + 1 ) if n is None else n # Note the requirement n>1, otherwise there's 'no context' and the model fails
65- counts = get_counts (n )
67+ counts = get_counts (n , dev = dev )
6668 start_tokens = start .lower ().split ()
6769 need = n - 1
6870 ctx = tuple ((([START ]* need ) + start_tokens )[- need :]) if need else ()
@@ -89,7 +91,7 @@ def run(response, answer, params:Params) -> Result:
8991 response_used = isinstance (response , str )
9092 context = response if response_used else "the general" # Default context
9193 context_window = params .get ("context_window" , 3 ) or 3
92- output .append (generate (context ,word_count ,context_window ))
94+ output .append (generate (context ,word_count ,context_window , dev = params . get ( "dev" , False ) ))
9395 preface = 'Context window: ' + str (context_window )+ ', Word count: ' + str (word_count )+ '. Output: <br>'
9496 feedback_items = [("general" , preface + ' ' .join (output ))]
9597 #feedback_items.append("| Answer not an integer; used default context window") if not response_used else None
0 commit comments