Skip to content

Update tutorial.md #1163

Update tutorial.md

Update tutorial.md #1163

Workflow file for this run

name: Jekyll site CI
on:
push:
branches:
- source
jobs:
build:
runs-on: ubuntu-latest
permissions:
contents: write # needed for push to Pages branch
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
steps:
# === Checkout repository (with full history for git log) ===
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
# === Generate Git history manifest ===
- name: Setup Python for history manifest
uses: actions/setup-python@v5
with:
python-version: '3.x'
- name: Generate paper-history.json
run: |
echo "Generating paper-history.json from Git history..."
python ${{ github.workspace }}/etc/build_git_manifest.py
# === Pandas install ===
- name: Install pandas
run: pip install pandas
# === Build site using Debian-based Jekyll image (no Gemfile required) ===
- name: Build the site (jekyll/jekyll:4, fix perms)
run: |
rm -f ${{ github.workspace }}/_publications/template || true
mkdir -p ${{ github.workspace }}/.jekyll-cache
mkdir -p ${{ github.workspace }}/_site
chmod -R a+rwX ${{ github.workspace }}/.jekyll-cache
chmod -R a+rwX ${{ github.workspace }}/_site
docker run --rm \
-v ${{ github.workspace }}:/srv/jekyll \
-w /srv/jekyll \
-e JEKYLL_ENV=production \
-e JEKYLL_CACHE_DIR=/srv/jekyll/.jekyll-cache \
jekyll/jekyll:4 \
jekyll build --future --trace
# === Python setup & package installs ===
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.8'
architecture: x64
- name: Install required Python packages
run: |
python -m pip install --upgrade pip
pip install transformers scikit-learn numpy nltk gensim scipy
pip install torch==1.10.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
pip install networkx python-louvain
- name: Download NLTK data
run: |
python -c "import nltk; nltk.download('punkt'); nltk.download('punkt_tab'); nltk.download('stopwords'); nltk.download('wordnet'); nltk.download('omw-1.4')"
# === Author stats ===
- name: Extract Author Stats
run: |
python ${{ github.workspace }}/etc/extract_author_stats.py \
--markdown_dir ${{ github.workspace }}/_publications \
--output_dir ${{ github.workspace }}/_site/ \
--output_filename author_stats.json
# === Embeddings & clustering ===
- name: Compute tSNE Embeddings
run: |
python ${{ github.workspace }}/etc/compute_embeddings.py \
${{ github.workspace }}/_site/paper-abstracts.json \
${{ github.workspace }}/_site/tsne.json
- name: Perform Clustering on tSNE Embeddings
run: |
python ${{ github.workspace }}/etc/perform_clustering.py \
${{ github.workspace }}/_site/tsne.json \
${{ github.workspace }}/_site/tsne_clustered.json \
10
- name: Summarize Tags by Cluster
run: |
python ${{ github.workspace }}/etc/summarize_clusters.py \
${{ github.workspace }}/_site/tsne_clustered.json \
${{ github.workspace }}/_site/cluster_summary.json
# === Topics & related content ===
- name: Compute topics and related content
run: |
python ${{ github.workspace }}/etc/compute_topics.py \
${{ github.workspace }}/_site/paper-abstracts.json \
${{ github.workspace }}/_site/topics.json
python ${{ github.workspace }}/etc/compute_related.py \
${{ github.workspace }}/_site/paper-abstracts.json \
${{ github.workspace }}/_site/publications-metadata/
# === Deploy ===
- name: Deploy to GitHub Pages
uses: peaceiris/actions-gh-pages@v4
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./_site
publish_branch: master # user/organization site repo
force_orphan: true # avoid non-FF errors by recreating branch
user_name: github-actions[bot] # optional but tidy
user_email: github-actions[bot]@users.noreply.github.com