Update tutorial.md #1163
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Jekyll site CI | |
| on: | |
| push: | |
| branches: | |
| - source | |
| jobs: | |
| build: | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: write # needed for push to Pages branch | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref }} | |
| cancel-in-progress: true | |
| steps: | |
| # === Checkout repository (with full history for git log) === | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| # === Generate Git history manifest === | |
| - name: Setup Python for history manifest | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.x' | |
| - name: Generate paper-history.json | |
| run: | | |
| echo "Generating paper-history.json from Git history..." | |
| python ${{ github.workspace }}/etc/build_git_manifest.py | |
| # === Pandas install === | |
| - name: Install pandas | |
| run: pip install pandas | |
| # === Build site using Debian-based Jekyll image (no Gemfile required) === | |
| - name: Build the site (jekyll/jekyll:4, fix perms) | |
| run: | | |
| rm -f ${{ github.workspace }}/_publications/template || true | |
| mkdir -p ${{ github.workspace }}/.jekyll-cache | |
| mkdir -p ${{ github.workspace }}/_site | |
| chmod -R a+rwX ${{ github.workspace }}/.jekyll-cache | |
| chmod -R a+rwX ${{ github.workspace }}/_site | |
| docker run --rm \ | |
| -v ${{ github.workspace }}:/srv/jekyll \ | |
| -w /srv/jekyll \ | |
| -e JEKYLL_ENV=production \ | |
| -e JEKYLL_CACHE_DIR=/srv/jekyll/.jekyll-cache \ | |
| jekyll/jekyll:4 \ | |
| jekyll build --future --trace | |
| # === Python setup & package installs === | |
| - name: Setup Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.8' | |
| architecture: x64 | |
| - name: Install required Python packages | |
| run: | | |
| python -m pip install --upgrade pip | |
| pip install transformers scikit-learn numpy nltk gensim scipy | |
| pip install torch==1.10.0+cpu -f https://download.pytorch.org/whl/torch_stable.html | |
| pip install networkx python-louvain | |
| - name: Download NLTK data | |
| run: | | |
| python -c "import nltk; nltk.download('punkt'); nltk.download('punkt_tab'); nltk.download('stopwords'); nltk.download('wordnet'); nltk.download('omw-1.4')" | |
| # === Author stats === | |
| - name: Extract Author Stats | |
| run: | | |
| python ${{ github.workspace }}/etc/extract_author_stats.py \ | |
| --markdown_dir ${{ github.workspace }}/_publications \ | |
| --output_dir ${{ github.workspace }}/_site/ \ | |
| --output_filename author_stats.json | |
| # === Embeddings & clustering === | |
| - name: Compute tSNE Embeddings | |
| run: | | |
| python ${{ github.workspace }}/etc/compute_embeddings.py \ | |
| ${{ github.workspace }}/_site/paper-abstracts.json \ | |
| ${{ github.workspace }}/_site/tsne.json | |
| - name: Perform Clustering on tSNE Embeddings | |
| run: | | |
| python ${{ github.workspace }}/etc/perform_clustering.py \ | |
| ${{ github.workspace }}/_site/tsne.json \ | |
| ${{ github.workspace }}/_site/tsne_clustered.json \ | |
| 10 | |
| - name: Summarize Tags by Cluster | |
| run: | | |
| python ${{ github.workspace }}/etc/summarize_clusters.py \ | |
| ${{ github.workspace }}/_site/tsne_clustered.json \ | |
| ${{ github.workspace }}/_site/cluster_summary.json | |
| # === Topics & related content === | |
| - name: Compute topics and related content | |
| run: | | |
| python ${{ github.workspace }}/etc/compute_topics.py \ | |
| ${{ github.workspace }}/_site/paper-abstracts.json \ | |
| ${{ github.workspace }}/_site/topics.json | |
| python ${{ github.workspace }}/etc/compute_related.py \ | |
| ${{ github.workspace }}/_site/paper-abstracts.json \ | |
| ${{ github.workspace }}/_site/publications-metadata/ | |
| # === Deploy === | |
| - name: Deploy to GitHub Pages | |
| uses: peaceiris/actions-gh-pages@v4 | |
| with: | |
| github_token: ${{ secrets.GITHUB_TOKEN }} | |
| publish_dir: ./_site | |
| publish_branch: master # user/organization site repo | |
| force_orphan: true # avoid non-FF errors by recreating branch | |
| user_name: github-actions[bot] # optional but tidy | |
| user_email: github-actions[bot]@users.noreply.github.com |