Skip to content
This repository was archived by the owner on Mar 7, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .github/workflows/merge.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ on:
- master

jobs:
tests:
uses: ./.github/workflows/tests.yml
secrets:
openai_key: ${{ secrets.OPENAI_KEY }}
discord_webhook_url: ${{ secrets.DISCORD_WEBHOOK_URL }}
# TODO: Renable once browser forge is fixed for camoufox, or else tests will never pass
# tests:
# uses: ./.github/workflows/tests.yml
# secrets:
# openai_key: ${{ secrets.OPENAI_KEY }}
# discord_webhook_url: ${{ secrets.DISCORD_WEBHOOK_URL }}

version:
needs: tests
uses: ./.github/workflows/version.yml
secrets:
git_token: ${{ secrets.GPAT_TOKEN }}
Expand Down
5 changes: 0 additions & 5 deletions .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,6 @@ on:
workflow_dispatch:

jobs:
checkout:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

tests:
uses: ./.github/workflows/tests.yml
secrets:
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ jobs:
- name: Checkout
uses: actions/checkout@v4

- uses: actions/setup-node@v3

- name: Set env
run: echo "ENV=test" >> $GITHUB_ENV

Expand All @@ -20,7 +22,7 @@ jobs:
run: pdm install

- name: Install playwright
run: pdm run playwright install
run: pdm run playwright install --with-deps

- name: Run tests
run: PYTHONPATH=. pdm run pytest -v -ra api/backend/tests
Expand Down
7 changes: 5 additions & 2 deletions api/backend/ai/agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from playwright.async_api import Page

# LOCAL
from api.backend.constants import RECORDINGS_ENABLED
from api.backend.ai.clients import ask_ollama, ask_open_ai, open_ai_key
from api.backend.job.models import CapturedElement
from api.backend.worker.logger import LOG
Expand All @@ -29,11 +30,13 @@ async def scrape_with_agent(agent_job: dict[str, Any]):
LOG.info(f"Starting work for agent job: {agent_job}")
pages = set()

proxy = None

if agent_job["job_options"]["proxies"]:
proxy = random.choice(agent_job["job_options"]["proxies"])
LOG.info(f"Using proxy: {proxy}")

async with AsyncCamoufox(headless=True) as browser:
async with AsyncCamoufox(headless=not RECORDINGS_ENABLED, proxy=proxy) as browser:
page: Page = await browser.new_page()

await add_custom_items(
Expand Down Expand Up @@ -64,7 +67,7 @@ async def scrape_with_agent(agent_job: dict[str, Any]):
xpaths = parse_response(response)

captured_elements = await capture_elements(
page, xpaths, agent_job["job_options"]["return_html"]
page, xpaths, agent_job["job_options"].get("return_html", False)
)

final_url = page.url
Expand Down
1 change: 1 addition & 0 deletions api/backend/database/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def insert(query: str, values: tuple[Any, ...]):

except sqlite3.Error as e:
LOG.error(f"An error occurred: {e}")
raise e

finally:
cursor.close()
Expand Down
13 changes: 9 additions & 4 deletions api/backend/database/queries/job/job_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,15 @@ async def get_queued_job():
return res[0] if res else None


async def update_job(ids: list[str], field: str, value: Any):
query = f"UPDATE jobs SET {field} = ? WHERE id IN {format_list_for_query(ids)}"
res = update(query, tuple([value] + ids))
LOG.info(f"Updated job: {res}")
async def update_job(ids: list[str], updates: dict[str, Any]):
if not updates:
return

set_clause = ", ".join(f"{field} = ?" for field in updates.keys())
query = f"UPDATE jobs SET {set_clause} WHERE id IN {format_list_for_query(ids)}"
values = list(updates.values()) + ids
res = update(query, tuple(values))
LOG.debug(f"Updated job: {res}")


async def delete_jobs(jobs: list[str]):
Expand Down
31 changes: 30 additions & 1 deletion api/backend/job/job.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# STL
import logging
import datetime
from typing import Any

# LOCAL
Expand All @@ -12,7 +13,23 @@
LOG = logging.getLogger("Job")


def insert(item: dict[str, Any]) -> None:
async def insert(item: dict[str, Any]) -> None:
if check_for_job_completion(item["id"]):
await multi_field_update_job(
item["id"],
{
"agent_mode": item["agent_mode"],
"prompt": item["prompt"],
"job_options": item["job_options"],
"elements": item["elements"],
"status": "Queued",
"result": [],
"time_created": datetime.datetime.now().isoformat(),
"chat": None,
},
)
return

common_insert(
JOB_INSERT_QUERY,
(
Expand All @@ -33,6 +50,12 @@ def insert(item: dict[str, Any]) -> None:
LOG.debug(f"Inserted item: {item}")


def check_for_job_completion(id: str) -> dict[str, Any]:
query = f"SELECT * FROM jobs WHERE id = ?"
res = common_query(query, (id,))
return res[0] if res else {}


async def get_queued_job():
query = (
"SELECT * FROM jobs WHERE status = 'Queued' ORDER BY time_created DESC LIMIT 1"
Expand All @@ -48,6 +71,12 @@ async def update_job(ids: list[str], field: str, value: Any):
LOG.debug(f"Updated job: {res}")


async def multi_field_update_job(id: str, fields: dict[str, Any]):
query = f"UPDATE jobs SET {', '.join(f'{field} = ?' for field in fields.keys())} WHERE id = ?"
res = common_update(query, tuple(list(fields.values()) + [id]))
LOG.debug(f"Updated job: {res}")


async def delete_jobs(jobs: list[str]):
if not jobs:
LOG.debug("No jobs to delete.")
Expand Down
14 changes: 8 additions & 6 deletions api/backend/job/job_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,20 @@
@job_router.post("/update")
@handle_exceptions(logger=LOG)
async def update(update_jobs: UpdateJobs, _: User = Depends(get_current_user)):
"""Used to update jobs"""
await update_job(update_jobs.ids, update_jobs.field, update_jobs.value)

return JSONResponse(content={"message": "Jobs updated successfully."})
return {"message": "Jobs updated successfully"}


@job_router.post("/submit-scrape-job")
@handle_exceptions(logger=LOG)
async def submit_scrape_job(job: Job):
LOG.info(f"Recieved job: {job}")

job.id = uuid.uuid4().hex
if not job.id:
job.id = uuid.uuid4().hex

job_dict = job.model_dump()
insert(job_dict)
await insert(job_dict)

return JSONResponse(
content={"id": job.id, "message": "Job submitted successfully."}
Expand All @@ -70,7 +70,9 @@ async def retrieve_scrape_jobs(
):
LOG.info(f"Retrieving jobs for account: {user.email}")
ATTRIBUTES = "chat" if fetch_options.chat else "*"
job_query = f"SELECT {ATTRIBUTES} FROM jobs WHERE user = ?"
job_query = (
f"SELECT {ATTRIBUTES} FROM jobs WHERE user = ? ORDER BY time_created ASC"
)
results = query(job_query, (user.email,))
return JSONResponse(content=jsonable_encoder(results[::-1]))

Expand Down
4 changes: 3 additions & 1 deletion api/backend/job/scraping/scraping.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,9 @@ async def scrape(

for page in pages:
elements.append(
await collect_scraped_elements(page, xpaths, job_options["return_html"])
await collect_scraped_elements(
page, xpaths, job_options.get("return_html", False)
)
)

return elements
2 changes: 1 addition & 1 deletion next-env.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
/// <reference types="next/image-types/global" />

// NOTE: This file should not be edited
// see https://nextjs.org/docs/basic-features/typescript for more information.
// see https://nextjs.org/docs/pages/building-your-application/configuring/typescript for more information.
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import { ExpandedTableInput } from "@/components/common/expanded-table-input";
import { UploadFile } from "@/components/common/upload-file";
import { useImportJobConfig } from "@/hooks/use-import-job-config";
import { RawJobOptions } from "@/types";
import {
Code as CodeIcon,
Expand Down Expand Up @@ -26,6 +28,7 @@ import {
useTheme,
} from "@mui/material";
import { Dispatch, SetStateAction, useEffect, useState } from "react";
import { toast } from "react-toastify";

export type AdvancedJobOptionsDialogProps = {
open: boolean;
Expand All @@ -43,6 +46,7 @@ export const AdvancedJobOptionsDialog = ({
multiPageScrapeEnabled = true,
}: AdvancedJobOptionsDialogProps) => {
const theme = useTheme();
const { handleUploadFile } = useImportJobConfig();
const [localJobOptions, setLocalJobOptions] =
useState<RawJobOptions>(jobOptions);

Expand All @@ -69,6 +73,18 @@ export const AdvancedJobOptionsDialog = ({
onClose();
};

const onUploadFile = async (file: File) => {
const errorOccured = await handleUploadFile(file);
if (errorOccured) {
handleClose();
toast.error("Failed to upload job config");
return;
} else {
handleClose();
toast.success("Job config uploaded successfully");
}
};

return (
<Dialog
open={open}
Expand Down Expand Up @@ -99,11 +115,18 @@ export const AdvancedJobOptionsDialog = ({
<Typography variant="h6" component="div">
Advanced Job Options
</Typography>
<Settings
sx={{
color: theme.palette.primary.contrastText,
}}
/>
<Box sx={{ display: "flex", alignItems: "center", gap: 1 }}>
<UploadFile
message="Upload Job Config"
fileTypes={["application/json"]}
onUploadFile={onUploadFile}
/>
<Settings
sx={{
color: theme.palette.primary.contrastText,
}}
/>
</Box>
</DialogTitle>

<DialogContent
Expand Down
1 change: 1 addition & 0 deletions src/components/common/upload-file/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from "./upload-file";
34 changes: 34 additions & 0 deletions src/components/common/upload-file/upload-file.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import { Box, Button, Typography } from "@mui/material";

export type UploadFileProps = {
message: string;
fileTypes?: string[];
onUploadFile: (file: File) => void;
};

export const UploadFile = ({
message,
fileTypes,
onUploadFile,
}: UploadFileProps) => {
const handleUploadFile = (event: React.ChangeEvent<HTMLInputElement>) => {
const file = event.target.files?.[0];
if (file) {
onUploadFile(file);
}
};

return (
<Box>
<Button variant="contained" component="label">
<Typography>{message}</Typography>
<input
type="file"
hidden
onChange={handleUploadFile}
accept={fileTypes?.join(",")}
/>
</Button>
</Box>
);
};
45 changes: 34 additions & 11 deletions src/components/jobs/favorites.tsx
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
import React from "react";
import StarIcon from "@mui/icons-material/Star";
import {
Tooltip,
Box,
Button,
Checkbox,
IconButton,
Table,
TableBody,
TableCell,
TableHead,
TableRow,
Box,
Checkbox,
Button,
Tooltip,
} from "@mui/material";
import router from "next/router";
import { Job } from "../../types";
import StarIcon from "@mui/icons-material/Star";

interface stateProps {
selectedJobs: Set<string>;
Expand All @@ -21,7 +21,12 @@ interface stateProps {

interface Props {
onSelectJob: (job: string) => void;
onNavigate: (elements: Object[], url: string, options: any) => void;
onNavigate: (
id: string,
elements: Object[],
url: string,
options: any
) => void;
onFavorite: (ids: string[], field: string, value: any) => void;
stateProps: stateProps;
}
Expand Down Expand Up @@ -87,11 +92,29 @@ export const Favorites = ({
</TableCell>
<TableCell sx={{ maxWidth: 100, overflow: "auto" }}>
<Button
onClick={() =>
onNavigate(row.elements, row.url, row.job_options)
}
onClick={() => {
if (row.agent_mode) {
router.push({
pathname: "/agent",
query: {
url: row.url,
prompt: row.prompt,
job_options: JSON.stringify(row.job_options),
id: row.id,
},
});
} else {
onNavigate(row.id, row.elements, row.url, row.job_options);
}
}}
size="small"
sx={{
minWidth: 0,
padding: "4px 8px",
fontSize: "0.625rem",
}}
>
Run
Rerun
</Button>
</TableCell>
</TableRow>
Expand Down
Loading
Loading