opportunity-hack · gregv · May 19, 2026 · May 18, 2026 · May 18, 2026 · May 19, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -121,7 +121,9 @@ These are DIFFERENT VALUES. When bundling user data for the frontend, include bo
 Two scripts live in `scripts/` for diagnosing and backfilling team rosters on `/hack/<event_id>`:
 
 - `audit_hackathon_team_users.py --event-id <id>` (read-only) — walks `hackathons/{id}.teams[] -> teams/{id}.users[]` and reports per-team member counts, dangling refs (team points to deleted user doc), and "ghost" users (no name + no propel_id = imported but never logged in).
-- `import_hackathon_users_from_csv.py --csv <path> --event-id <id> --csv-type {registrants|projects|roster} [--apply]` — dry-run by default. `projects` parses Devpost projects CSVs (variable-length team-member triplets starting at col 22, 1-indexed); `roster` parses a generic `team,email[,first_name,last_name,name]` CSV for backfilling memberships; `registrants` just seeds user docs. Users are matched by `email_address` (case-insensitive). Imported users get `imported=True`, `import_source`, `import_event_id`, blank `user_id`/`propel_id`. Team membership writes are additive — never removes existing members. Re-runnable.
+- `import_hackathon_users_from_csv.py --csv <path> --event-id <id> --csv-type {registrants|projects|roster} [--apply]` — dry-run by default. `projects` parses Devpost projects CSVs; the team-member triplet offset is resolved by header lookup (`Team Member 1 First Name`), since old 23-col exports have no "Team Number" column while newer 24-col ones do. Each parsed "email" is validated with the email regex — rows where the triplet shifted off-axis are skipped with a warning rather than written as bogus user docs. `roster` parses a generic `team,email[,first_name,last_name,name]` CSV for backfilling memberships; `registrants` just seeds user docs. Users are matched by `email_address` (case-insensitive). Imported users get `imported=True`, `import_source`, `import_event_id`, blank `user_id`/`propel_id`. Team membership writes are additive — never removes existing members. Re-runnable.
+- `cleanup_bogus_imported_users.py [--event-id <id>] [--apply]` — finds and removes the user docs left behind by the older off-by-one `parse_projects` bug. Fingerprint: `imported=True` AND `propel_id=""` AND `email_address` present but not a valid email AND `import_source` starts with `projects-`. For each matched user it prunes the doc-ref from every team's `users[]` that references it, then deletes the user doc. Dry-run by default. After running, re-run `import_hackathon_users_from_csv.py --csv-type projects` against the affected events to import the real members.
+- `backfill_devpost_winners.py --event-id <id> --devpost-url <url> [--projects-csv <path>] [--apply]` — scrapes the Devpost project gallery for EVERY project tile, flagging winners (`aside.entry-badge img.winner`). For each project it matches to a Firestore team via a layered strategy: `teams.devpost_link` exact-URL → team name (case-insensitive) → email-overlap via Devpost projects CSV (auto-discovered from `/tmp/devpost_files/<event_id>/projects-*.csv`). Two backfills happen in one pass: (1) any matched team with an empty `devpost_link` gets the gallery URL written; (2) matched WINNERS additionally get `/software/<slug>` fetched for prize text + member names, with prize strings mapped to status — "1st place" → `FOUNDING_ENGINEERS`, "Completion" or "2nd place" → `COMPLETION_SUPPORT`, anything else marked Winner → `CATEGORY_WINNER` (rank-based; multi-prize teams get the best status, all prize text retained in `awards: []`). Conflicts (team already has a different `devpost_link`) are logged but never overwritten. Unmatched winners exit with code 2 so a human notices; unmatched non-winners are listed for visibility but don't fail the run (typical for teams that registered only on Devpost). Only sets `status`, `awards`, `winners_backfilled_at/source`, and `devpost_link`; never touches `users[]`. Re-runnable. Adds `beautifulsoup4` to requirements.
 
 ## Resend audience sync
 `scripts/sync_resend_audience.py --source {all|profiles|volunteers|mentors|judges|sponsors|helpers|leads} --audience "<name>" [--event-id <id>] [--selected-only] [--apply]` — pulls emails from Firestore (`users.email_address`, `volunteers.email` filtered by `volunteer_type`, `leads.email`) and upserts contacts into a Resend audience (creates if missing). Dry-run by default. Re-runnable: lists existing audience contacts first and only POSTs new emails. Needs `RESEND_API_KEY` with audiences scope — the existing `RESEND_WELCOME_EMAIL_KEY` is send-only and will 401. Uses the deprecated `resend.Audiences` SDK class (now an alias for Segments) — fine for now, but if it breaks switch to `resend.Segments`.

diff --git a/Dockerfile b/Dockerfile
@@ -24,4 +24,4 @@ ENV GUNICORN_CMD_ARGS="--bind=[::]:6060 --workers=2"
 # Copy project
 COPY . /app/
 # Run the application
-CMD ["venv/bin/gunicorn", "api.wsgi:app", "--log-file=-", "--log-level", "debug", "--preload", "--workers", "1"]
+CMD ["venv/bin/gunicorn", "api.wsgi:app", "--log-file=-", "--log-level", "debug", "--preload", "--workers", "1", "--timeout", "120"]
diff --git a/api/messages/messages_service.py b/api/messages/messages_service.py
@@ -335,18 +335,71 @@ def get_profile_metadata_old(propel_id):
     }
 
 
+# Fields returned to the admin /admin/profiles consumers (page + UserSearchDialog).
+# Keep this in sync with frontend src/pages/admin/profile/index.js and
+# src/components/admin/UserSearchDialog.js. Drop anything heavy (history) or
+# unused (mailing address, propel_id, want_stickers) — those routes have their
+# own /profile/<id> fetch when a row is opened.
+_ADMIN_PROFILE_LEAN_FIELDS = (
+    "name",
+    "nickname",
+    "email_address",
+    "user_id",
+    "profile_image",
+    "last_login",
+    "github",
+    "linkedin_url",
+    "instagram_url",
+    "company",
+    "education",
+    "role",
+    "shirt_size",
+    "expertise",
+    "why",
+)
+
+
+def _lean_admin_profile(doc):
+    """Project a Firestore user doc into the lean shape the admin search uses.
+
+    Resolves DocumentReference lists (badges/teams/hackathons) to id strings
+    inline, since the frontend only reads `.length` on these arrays. Avoids
+    `doc_to_json`'s broader behavior and the heavy `history` field entirely.
+    """
+    d = doc.to_dict() or {}
+    out = {"id": doc.id}
+    for key in _ADMIN_PROFILE_LEAN_FIELDS:
+        v = d.get(key)
+        if v is not None:
+            out[key] = v
+
+    for ref_key in ("badges", "teams", "hackathons"):
+        value = d.get(ref_key)
+        if isinstance(value, list):
+            out[ref_key] = [
+                v.id if isinstance(v, firestore.DocumentReference) else v
+                for v in value
+            ]
+
+    vol = d.get("volunteering")
+    if isinstance(vol, list):
+        out["volunteering"] = [
+            {"hours": v.get("hours", 0)}
+            for v in vol if isinstance(v, dict)
+        ]
+
+    return out
+
+
+# 5-minute TTL is enough to absorb tab refreshes / multiple admins loading the
+# page in close succession while still picking up new signups within minutes.
+@cached(cache=TTLCache(maxsize=1, ttl=300), key=lambda: "all")
 def get_all_profiles():
     db = get_db()
-    docs = db.collection('users').stream()  # steam() gets all records
-    if docs is None:
-        return {[]}
-    else:
-        results = []
-        for doc in docs:
-            results.append(doc_to_json(docid=doc.id, doc=doc))
-
+    docs = db.collection('users').stream()
+    results = [_lean_admin_profile(doc) for doc in docs]
     logger.info(f"get_all_profiles returned {len(results)} profiles")
-    return { "profiles": results }
+    return {"profiles": results}
 
 
 # Caching is not needed because the parent method already is caching

diff --git a/api/messages/messages_views.py b/api/messages/messages_views.py
@@ -204,7 +204,10 @@
     logger.info("PATCH /hackathon called")
     user_id = get_authenticated_user_id()
     if user_id:
-        return vars(save_hackathon(request.get_json(), user_id))
+        result = save_hackathon(request.get_json(), user_id)
+        if isinstance(result, tuple):
+            return result
+        return vars(result)
     return {"error": "Unauthorized"}, 401
 
 

diff --git a/common/utils/firestore_helpers.py b/common/utils/firestore_helpers.py
@@ -67,7 +67,7 @@ def doc_to_json(docid=None, doc=None, depth=0):
         return doc
 
     if d_json is None:
-        logger.warn(f"doc.to_dict() is NoneType | docid={docid} doc={doc}")
+        logger.warning(f"doc.to_dict() is NoneType | docid={docid} doc={doc}")
         return
 
     # If any values in d_json is a list, add only the document id to the list for DocumentReference or DocumentSnapshot

diff --git a/common/utils/openai_api.py b/common/utils/openai_api.py
@@ -143,7 +143,8 @@ def generate_and_save_image_to_cdn(directory, text):
         model="gpt-image-1",
         prompt=prompt,
         n=1,
-        size="1024x1024"       
+        size="1024x1024",
+        timeout=90.0
     ) 
 
     # Create a short filename from input text

diff --git a/common/utils/validators.py b/common/utils/validators.py
@@ -1,4 +1,5 @@
 import re
+from copy import deepcopy
 from urllib.parse import urlparse
 import logging
 from datetime import datetime
@@ -165,6 +166,140 @@ def validate_hackathon_data(data):
         validate_planning_subobject(planning)
 
 
+def validate_hackathon_data_partial(data):
+    """Validate hackathon data with partial-save semantics.
+
+    Required fields and date integrity always raise ValueError (hard fail).
+    Each optional field is validated individually; failures strip the field
+    from the returned data and record it in skipped_fields.
+
+    Returns:
+        (cleaned_data, skipped_fields) where cleaned_data is a deep copy of
+        ``data`` with invalid optional fields removed, and skipped_fields is a
+        list of ``{"field": str, "reason": str}`` dicts.
+
+    Raises:
+        ValueError: if a required field is missing/empty or dates are invalid.
+    """
+    # Hard fail: required fields and date ordering
+    required_fields = ["title", "description", "location", "start_date", "end_date", "type", "image_url", "event_id"]
+    for field in required_fields:
+        if field not in data or not data[field]:
+            raise ValueError(f"Missing required field: {field}")
+    try:
+        start_date = datetime.fromisoformat(data["start_date"])
+        end_date = datetime.fromisoformat(data["end_date"])
+        if end_date <= start_date:
+            raise ValueError(f"End date must be after start date: start_date={start_date}, end_date={end_date}")
+    except ValueError:
+        raise
+
+    cleaned = deepcopy(data)
+    skipped = []
+
+    def _skip(field, reason):
+        skipped.append({"field": field, "reason": reason})
+        logger.warning("Field '%s' failed validation and will not be saved: %s", field, reason)
+
+    # Timezone
+    timezone = cleaned.get("timezone")
+    if timezone:
+        try:
+            ZoneInfo(timezone)
+        except (ZoneInfoNotFoundError, KeyError):
+            _skip("timezone", f"Invalid timezone: {timezone}")
+            cleaned.pop("timezone", None)
+
+    # Constraints — validate sub-fields individually
+    constraints = cleaned.get("constraints")
+    if constraints is not None:
+        if not isinstance(constraints, dict):
+            _skip("constraints", "constraints must be an object")
+            cleaned.pop("constraints", None)
+        else:
+            c = dict(constraints)
+
+            for k in ["max_people_per_team", "max_teams_per_problem", "min_people_per_team"]:
+                if k in c and not isinstance(c[k], int):
+                    _skip(f"constraints.{k}", "must be an integer")
+                    c.pop(k)
+
+            if "hacker_required_questions" in c:
+                try:
+                    hrq = c["hacker_required_questions"]
+                    questions = hrq.get("questions", [])
+                    if not isinstance(questions, list):
+                        raise ValueError("hacker_required_questions.questions must be a list")
+                    for i, q in enumerate(questions):
+                        if not isinstance(q, dict):
+                            raise ValueError(f"Question {i} must be an object")
+                        if not isinstance(q.get("question"), str) or not q.get("question"):
+                            raise ValueError(f"Question {i} must have a non-empty 'question' string")
+                        if not isinstance(q.get("required_answer"), bool):
+                            raise ValueError(f"Question {i} must have a boolean 'required_answer'")
+                        if not isinstance(q.get("error"), str) or not q.get("error"):
+                            raise ValueError(f"Question {i} must have a non-empty 'error' string")
+                except ValueError as e:
+                    _skip("constraints.hacker_required_questions", str(e))
+                    c.pop("hacker_required_questions")
+
+            arrival = c.get("judge_venue_arrival_time")
+            if arrival not in (None, ""):
+                if not isinstance(arrival, str) or not re.match(r"^([01]\d|2[0-3]):[0-5]\d$", arrival):
+                    _skip("constraints.judge_venue_arrival_time", "must be HH:MM (24-hour)")
+                    c.pop("judge_venue_arrival_time")
+
+            if "hacker_deposit" in c and c["hacker_deposit"] is not None:
+                try:
+                    hd = c["hacker_deposit"]
+                    if not isinstance(hd, dict):
+                        raise ValueError("hacker_deposit must be an object")
+                    if "enabled" in hd and not isinstance(hd["enabled"], bool):
+                        raise ValueError("hacker_deposit.enabled must be boolean")
+                    amount = hd.get("default_amount_cents")
+                    if amount is not None:
+                        if not isinstance(amount, int) or amount < 0 or amount > 50000:
+                            raise ValueError("hacker_deposit.default_amount_cents must be a non-negative integer (cents) up to 50000")
+                except ValueError as e:
+                    _skip("constraints.hacker_deposit", str(e))
+                    c.pop("hacker_deposit")
+
+            if "meals" in c and c["meals"] is not None:
+                try:
+                    validate_meals(c["meals"])
+                except ValueError as e:
+                    _skip("constraints.meals", str(e))
+                    c.pop("meals")
+
+            cleaned["constraints"] = c
+
+    # event_photos
+    if "event_photos" in cleaned and cleaned["event_photos"] is not None:
+        try:
+            validate_event_photos(cleaned["event_photos"])
+        except ValueError as e:
+            _skip("event_photos", str(e))
+            cleaned.pop("event_photos")
+
+    # social_posts
+    if "social_posts" in cleaned and cleaned["social_posts"] is not None:
+        try:
+            validate_social_posts(cleaned["social_posts"])
+        except ValueError as e:
+            _skip("social_posts", str(e))
+            cleaned.pop("social_posts")
+
+    # planning
+    if "planning" in cleaned and cleaned["planning"] is not None:
+        try:
+            validate_planning_subobject(cleaned["planning"])
+        except ValueError as e:
+            _skip("planning", str(e))
+            cleaned.pop("planning")
+
+    return cleaned, skipped
+
+
 ALLOWED_DIETARY_TAGS = {
     "vegetarian",
     "vegan",

diff --git a/db/firestore.py b/db/firestore.py
@@ -325,8 +325,40 @@ def fetch_problem_statements(self):
         debug(logger, "Fetching all problem statements")
         db = self.get_db()
         try:
-            docs = db.collection('problem_statements').stream()
-            results = [convert_to_entity(doc, ProblemStatement) for doc in docs or []]
+            docs = list(db.collection('problem_statements').stream())
+
+            # Collect every unique event DocumentReference across all docs so we
+            # can batch-fetch them in a single RPC instead of one per ref (N+1).
+            raw_data = []
+            all_event_refs: dict = {}
+            for doc in docs:
+                d = doc.to_dict() or {}
+                d['id'] = doc.id
+                raw_data.append(d)
+                for ref in d.get('events', []):
+                    if isinstance(ref, firestore.DocumentReference):
+                        all_event_refs[ref.id] = ref
+
+            # Single batch read for all referenced hackathon docs.
+            hackathon_map: dict = {}
+            if all_event_refs:
+                for snap in db.get_all(list(all_event_refs.values())):
+                    if snap.exists:
+                        h_data = snap.to_dict() or {}
+                        h_data['id'] = snap.id
+                        hackathon_map[snap.id] = Hackathon.deserialize(h_data)
+
+            # Build ProblemStatement objects using the prefetched hackathons.
+            results = []
+            for d in raw_data:
+                if 'events' in d:
+                    d['events'] = [
+                        hackathon_map[ref.id]
+                        for ref in d['events']
+                        if isinstance(ref, firestore.DocumentReference) and ref.id in hackathon_map
+                    ]
+                results.append(ProblemStatement.deserialize(d))
+
             info(logger, "Successfully fetched problem statements", count=len(results))
             return results
         except Exception as e:
@@ -419,6 +451,8 @@ def update_problem_statement(self, problem_statement: ProblemStatement):
             update_data['skills'] = problem_statement.skills
         if hasattr(problem_statement, 'rank'):
             update_data['rank'] = problem_statement.rank
+        if hasattr(problem_statement, 'slack_channel'):
+            update_data['slack_channel'] = problem_statement.slack_channel
 
         # Use update() instead of set() to only modify specified fields
         update_res = collection.document(problem_statement.id).update(update_data)

diff --git a/model/problem_statement.py b/model/problem_statement.py
@@ -27,6 +27,7 @@ def __init__(self):
         self.events = [] # TODO: Breaking change. This used to be called "events"
         self.status = None
         self.skills = []  # This is a list of skills, not a string
+        self.slack_channel = None
 
 
     @classmethod
@@ -40,6 +41,7 @@ def deserialize(cls, d):
         p.github = d['github'] if 'github' in d else None
         p.status = d['status'] if 'status' in d else None
         p.skills = d['skills'] if 'skills' in d else []
+        p.slack_channel = d['slack_channel'] if 'slack_channel' in d else None
 
         if 'events' in d:
             p.events = d['events']
@@ -75,7 +77,6 @@ def serialize(self):
             for event in self.events:
                 if event is not None:
                     if isinstance(event, dict):
-                        print(f"Event is already a dict: {event}")
                         d['events'].append(event)
                     else:
                         d['events'].append(event.serialize())
@@ -101,7 +102,7 @@ def serialize(self):
             d['references'] = []
 
         # Add remaining fields that aren't special cases
-        for field in ['github', 'status', 'first_thought_of', 'skills', 'rank']:
+        for field in ['github', 'status', 'first_thought_of', 'skills', 'rank', 'slack_channel']:
             if hasattr(self, field):
                 d[field] = getattr(self, field)
 

diff --git a/requirements.txt b/requirements.txt
@@ -14,6 +14,7 @@ python-dotenv>=1.0.1
 six==1.16.0
 Werkzeug>=3.1.3
 requests>=2.32.3
+beautifulsoup4>=4.12.2
 firebase_admin==6.5.0
 ratelimit==2.2.1
 cachetools==5.2.0