Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 36 additions & 8 deletions backend/API.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
# Configure CORS
app.use_cors(
allow_methods="GET POST PUT PATCH DELETE OPTIONS",
allow_origins="https://maatchaa.vercel.app http://localhost:3000",
allow_origins="https://www.maatchaa.co https://maatchaa.co https://maatchaa.vercel.app http://localhost:3000",
allow_headers="*",
allow_credentials=True,
max_age=86400 # Cache preflight for 24 hours
Expand All @@ -63,6 +63,18 @@
# Global SupabaseClient instance
supabase_client: SupabaseClient | None = None


def _qint(request: Request, key: str, default: int) -> int:
"""Parse an integer query param, falling back to default on missing/invalid
input instead of raising (which would surface as an unhandled 500)."""
try:
raw = request.query.get(key)
if isinstance(raw, list):
raw = raw[0] if raw else None
return int(raw) if raw is not None else default
except (ValueError, TypeError):
return default

@app.on_start
async def on_start(application: Application):
"""Initialize global resources when the application starts"""
Expand Down Expand Up @@ -220,15 +232,15 @@ async def ingest_products(request: Request):
shop_url = data.get("shop_url")

if not shop_url:
return json({"error": "shop_url is required"}, status=200)
return json({"error": "shop_url is required"}, status=400)

# Get products from Shopify
products = shopify.get_products(shop_url)

products_with_images = [p for p in products if p.get("image")]

if not products_with_images:
return json({"error": "No products with images found"}, status=200)
return json({"error": "No products with images found"}, status=404)

# Create embeddings and upsert to vector DB
embeddings = vectordb.embed_products(products_with_images)
Expand All @@ -243,7 +255,7 @@ async def ingest_products(request: Request):
except Exception as e:
import traceback
traceback.print_exc()
return json({"error": str(e)}, status=200)
return json({"error": str(e)}, status=500)

# Search for similar products by image URL
@post("/search/image")
Expand Down Expand Up @@ -325,8 +337,8 @@ async def get_products_vector(request: Request):
"""
try:
# Get query parameters
limit = int(request.query.get("limit", "20"))
offset = int(request.query.get("offset", "0"))
limit = _qint(request, "limit", 20)
offset = _qint(request, "offset", 0)

# Query vector database to get products
results = vectordb.index.query(
Expand Down Expand Up @@ -542,8 +554,8 @@ async def get_shopify_products(request: Request):
assert supabase_client

store_url = request.query.get("store_url")
limit = int(request.query.get("limit", "50"))
offset = int(request.query.get("offset", "0"))
limit = _qint(request, "limit", 50)
offset = _qint(request, "offset", 0)

query = supabase_client.client.table("shopify_products").select("*")

Expand Down Expand Up @@ -871,6 +883,22 @@ async def shopify_callback(request: Request):
return json({"error": "Invalid or expired state parameter"}, status=403)

state_record = state_result.data[0]

# Enforce the state's expiry window (CSRF token is meant to be short-lived).
expires_at = state_record.get("expires_at")
if expires_at:
from datetime import datetime, timezone as _tz
try:
exp = datetime.fromisoformat(str(expires_at).replace("Z", "+00:00"))
if exp.tzinfo is None:
exp = exp.replace(tzinfo=_tz.utc)
if datetime.now(_tz.utc) > exp:
# Mark consumed so it can't be retried, then reject.
await supabase_client.client.table("shopify_oauth_states").update({"used": True}).eq("state", state).execute()
return json({"error": "State parameter has expired"}, status=403)
except (ValueError, TypeError):
pass # Unparseable timestamp β€” fall through (used-flag still protects us)

company_id = state_record["company_id"]

# Mark state as used
Expand Down
16 changes: 11 additions & 5 deletions backend/utils/email.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import smtplib
import re
import html
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from typing import Optional, Dict, List
Expand Down Expand Up @@ -149,7 +150,7 @@ def create_partnership_email_template(
Returns:
HTML email content
"""
product_names = ", ".join([p.get("title", p.get("name", "")) for p in products[:3]])
product_names = ", ".join([html.escape(str(p.get("title", p.get("name", "")))) for p in products[:3]])
if len(products) > 3:
product_names += f" and {len(products) - 3} more"

Expand All @@ -159,15 +160,20 @@ def create_partnership_email_template(

We offer competitive commission rates and would love to discuss how we can work together to create authentic content that resonates with your audience."""

message_body = custom_message if custom_message else default_message
# custom_message is user-controlled and interpolated into HTML below β€” escape
# it to prevent HTML/script injection, then restore intended line breaks.
if custom_message:
message_body = html.escape(custom_message).replace("\n", "<br>")
else:
message_body = default_message

html = f"""
<html>
<body style="font-family: Arial, sans-serif; line-height: 1.6; color: #333;">
<div style="max-width: 600px; margin: 0 auto; padding: 20px;">
<h2 style="color: #2d3748;">Partnership Opportunity with {shop_name}</h2>
<h2 style="color: #2d3748;">Partnership Opportunity with {html.escape(shop_name or "")}</h2>

<p>Hi {creator_name},</p>
<p>Hi {html.escape(creator_name or "")},</p>

<p>{message_body}</p>

Expand All @@ -187,7 +193,7 @@ def create_partnership_email_template(
'''}

<p>Best regards,<br>
The {shop_name} Team</p>
The {html.escape(shop_name or "")} Team</p>

<hr style="border: none; border-top: 1px solid #e2e8f0; margin: 20px 0;">

Expand Down
119 changes: 119 additions & 0 deletions docs/backend-review.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# Backend review (`backend/`)

Review of the Python/BlackSheep backend (~9,400 LOC, 44 files; `API.py` is a
2,344-line monolith with 41 routes). The backend is **not currently deployed**,
so these are pre-deployment items, not live incidents. It cannot be run in the
review environment, so code fixes below were verified by `py_compile` +
inspection, not runtime.

Status legend: βœ… fixed in this PR Β· πŸ“‹ documented, needs follow-up.

---

## πŸ”΄ High

### 1. No authentication on any endpoint except the OAuth callback β€” πŸ“‹
Only `/shopify/callback` verifies anything (Shopify HMAC). The other 40 routes β€”
`/partnerships/*`, `/reels/interactions`, `/dashboard/stats`,
`/shopify/disconnect`, `/products/resync`, `/shopify/products/sync`, etc. β€” are
fully open. Anyone with the Cloud Run URL can read/modify all partnership data,
send emails, mint discount codes, and disconnect stores.

This is the same posture problem as the (now-fixed) RLS issue, one layer up.
**Recommended fix (before deploy):** a shared-secret guard, since there is no
user system yet. Add an `INTERNAL_API_KEY` env var and a small decorator:

```python
from functools import wraps
import os, hmac
def require_api_key(handler):
@wraps(handler)
async def wrapper(request: Request, *a, **kw):
expected = os.getenv("INTERNAL_API_KEY")
provided = request.get_first_header(b"x-api-key")
if not expected or not provided or not hmac.compare_digest(
provided.decode(), expected):
return json({"error": "Unauthorized"}, status=401)
return await handler(request, *a, **kw)
return wrapper
```

Apply to every mutating/sensitive route; the Next.js API routes (which already
hold secrets server-side) attach the header when calling the backend. Not done
in this PR because it touches ~40 handlers and can't be runtime-tested here;
should land with the deploy work. Longer term, replace with real per-merchant
auth (Shopify session tokens) β€” see [auth-mocked note in schema-redesign.md].

### 2. Open email relay + HTML injection β€” partially βœ…
`POST /partnerships/<id>/send-email`: caller controls both `to_email` and
`custom_message`, the route is unauthenticated, and `custom_message` was
interpolated **unescaped** into an HTML email.
- βœ… **Fixed:** `custom_message`, `creator_name`, and product titles are now
HTML-escaped in `utils/email.py` (`create_partnership_email_template`).
- πŸ“‹ **Remaining:** the open-relay vector (sending to an arbitrary `to_email`)
needs #1 β€” without auth, even restricting the recipient to the partnership's
stored email is bypassable (an attacker could PATCH the record first). Close
this with the auth layer.

### 3. Unauthenticated Shopify discount-code creation β€” πŸ“‹
`POST /partnerships/<id>/generate-affiliate` with `create_discount: true` calls
real Shopify `create_discount_code` using the merchant's stored token. Open
endpoint β†’ attacker mints discounts against a connected store. Needs #1 + an
ownership check.

---

## 🟑 Medium

### 4. OAuth state expiry never enforced β€” βœ…
The callback checked `used=False` but not `expires_at`, and
`cleanup_expired_oauth_states()` was never called β€” so the 10-minute CSRF window
was effectively infinite and the table grew unbounded.
- βœ… **Fixed:** `/shopify/callback` now rejects (and consumes) states past
`expires_at`.
- πŸ“‹ **Follow-up:** schedule `cleanup_expired_oauth_states()` (cron/worker) to
prune old rows.

### 5. Errors returned as HTTP 200 β€” βœ…
Four returns in `/ingest` and `/products/resync` sent error payloads with
`status=200`, so clients couldn't detect failure. Fixed to 400/404/500.

### 6. Unguarded `int(request.query...)` β€” βœ…
`?limit=abc` would throw β†’ unhandled 500. Added a `_qint()` helper that falls
back to the default; applied to the 4 call sites.

### 7. CORS origin stale β€” βœ…
`allow_origins` listed `maatchaa.vercel.app` but prod is now `www.maatchaa.co`,
so real browser calls would be blocked. Added `https://www.maatchaa.co` and
`https://maatchaa.co`.

---

## 🟒 Low / cleanup β€” πŸ“‹

- **`API.py` monolith** (2,344 lines). `partnerships_api.py` (570 lines) already
duplicates the 7 partnership routes β€” one set is dead/shadowed. Consolidate and
split `API.py` by domain.
- **Legacy routes on dead schema:** `/shopify/products/*/match` etc. operate on
`product_matches` / `shopify_products` (the 0-row v2 tables). Remove with
Phase 2 of the schema cleanup (`docs/schema-redesign.md`).
- Several handlers `print()` debug context and `raise e` without `from`. Cosmetic.

## βœ… Things that were already good
- Shopify OAuth: real HMAC verification (`hmac.compare_digest`), single-use
DB-backed state, anchored `*.myshopify.com` domain validation, request timeouts.
- Redis rate-limiting, a job queue (arq), Prometheus metrics, and a test suite.
- No `eval`/`exec`/`subprocess`/raw-SQL injection surfaces.
- GCP service-account JSON and `.env` are gitignored (not committed).

---

## What this PR changes (code)
`backend/utils/email.py` β€” HTML-escape user-controlled fields.
`backend/API.py` β€” enforce OAuth state expiry; `_qint()` guard for query ints;
add `www.maatchaa.co`/`maatchaa.co` to CORS; fix 4 error returns to proper
status codes.

All changes verified with `python3 -m py_compile`. The backend was not run
(undeployed; no runtime in the review env), so please smoke-test on the next
deploy.