wwwww

calderjo · calderjo · commit 97ffbf0376d5 · 2025-09-23T00:15:33.000Z
diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl
@@ -93,6 +93,25 @@ RUN apt-get install -y git-lfs && \
     apt-get install -y xvfb && \
     /tmp/clean-layer.sh
 
+RUN uv pip install --system --force-reinstall "nltk==3.9.1"
+RUN mkdir -p /usr/share/nltk_data && \
+    # NLTK Downloader no longer continues smoothly after an error, so we explicitly list
+    # the corpuses that work
+    python -m nltk.downloader -d /usr/share/nltk_data abc alpino averaged_perceptron_tagger \
+    basque_grammars biocreative_ppi bllip_wsj_no_aux \
+    book_grammars brown brown_tei cess_cat cess_esp chat80 city_database cmudict \
+    comtrans conll2000 conll2002 conll2007 crubadan dependency_treebank \
+    europarl_raw floresta gazetteers genesis gutenberg \
+    ieer inaugural indian jeita kimmo knbc large_grammars lin_thesaurus mac_morpho machado \
+    masc_tagged maxent_ne_chunker maxent_treebank_pos_tagger moses_sample movie_reviews \
+    mte_teip5 names nps_chat omw opinion_lexicon paradigms \
+    pil pl196x porter_test ppattach problem_reports product_reviews_1 product_reviews_2 propbank \
+    pros_cons ptb punkt punkt_tab qc reuters rslp rte sample_grammars semcor senseval sentence_polarity \
+    sentiwordnet shakespeare sinica_treebank smultron snowball_data spanish_grammars \
+    state_union stopwords subjectivity swadesh switchboard tagsets timit toolbox treebank \
+    twitter_samples udhr2 udhr unicode_samples universal_tagset universal_treebanks_v20 \
+    vader_lexicon verbnet webtext word2vec_sample wordnet wordnet_ic words ycoe
+
 # Download base easyocr models.
 # https://github.com/JaidedAI/EasyOCR#usage
 RUN mkdir -p /root/.EasyOCR/model && \
diff --git a/tests/test_numpy.py b/tests/test_numpy.py
@@ -19,13 +19,8 @@ def test_array(self):
     # Numpy must be linked to the MKL. (Occasionally, a third-party package will muck up the installation
     # and numpy will be reinstalled with an OpenBLAS backing.)
     def test_mkl(self):
-        try:
-            from numpy.distutils.system_info import get_info
-            # This will throw an exception if the MKL is not linked correctly or return an empty dict.
-            self.assertTrue(get_info("blas_mkl"))
-        except:
-            # Fallback to check if mkl is present via show_config()
-            config_out = io.StringIO()
-            with redirect_stdout(config_out):
-                np.show_config()
-            self.assertIn("mkl_rt", config_out.getvalue())
+        # Fallback to check if mkl is present via show_config()
+        config_out = io.StringIO()
+        with redirect_stdout(config_out):
+            np.show_config()
+            self.assertIn("mkl-dynamic", config_out.getvalue())