Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ go 1.26

require (
github.com/PuerkitoBio/goquery v1.12.0
github.com/UTDNebula/nebula-api/api v0.0.0-20260501050907-0dea4acc1dfe
github.com/UTDNebula/nebula-api/api v0.0.0-20260525053158-3209b0868dcf
github.com/chromedp/cdproto v0.0.0-20260321001828-e3e3800016bc
github.com/chromedp/chromedp v0.15.1
github.com/dongri/phonenumber v0.1.12
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ github.com/UTDNebula/nebula-api/api v0.0.0-20260327185527-807066607244 h1:vp2hsJ
github.com/UTDNebula/nebula-api/api v0.0.0-20260327185527-807066607244/go.mod h1:lp0oZHhVmqAqm0gf6Ald2jZXepZ0xFheTsW76T9wC7I=
github.com/UTDNebula/nebula-api/api v0.0.0-20260501050907-0dea4acc1dfe h1:/y+M3Up3U7PKvWV7yyZ7ouvNd8081Zwmd4p5NFD3kk4=
github.com/UTDNebula/nebula-api/api v0.0.0-20260501050907-0dea4acc1dfe/go.mod h1:i+PQZZ3qPtE4UxXkp3tQ46NWpzB8Of2/VNl0iZ/uv9I=
github.com/UTDNebula/nebula-api/api v0.0.0-20260525024309-4ea6ee54dd91 h1:KUwnKeedRHYncIcVYHMtXVmGUSp0LTxnbtO566GqC+c=
github.com/UTDNebula/nebula-api/api v0.0.0-20260525024309-4ea6ee54dd91/go.mod h1:i+PQZZ3qPtE4UxXkp3tQ46NWpzB8Of2/VNl0iZ/uv9I=
github.com/UTDNebula/nebula-api/api v0.0.0-20260525053158-3209b0868dcf h1:5IIliVrXFa8zyLrWkvK6Z5gtXVw8nrdsXWJkZqRONAU=
github.com/UTDNebula/nebula-api/api v0.0.0-20260525053158-3209b0868dcf/go.mod h1:i+PQZZ3qPtE4UxXkp3tQ46NWpzB8Of2/VNl0iZ/uv9I=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
Expand Down
1 change: 1 addition & 0 deletions parser/academicCalendarsParser.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
)

// What gets sent to Gemini, with the PDF content added
// WARNING: Changes to this prompt will invalidate all cached AI responses, only change if necessary
var academicCalendarPrompt = `Parse this PDF content and generate the following JSON schema.
{
Expand Down
14 changes: 8 additions & 6 deletions parser/budgetsParser.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@ import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io/fs"
"log"
"os"
"path/filepath"
"reflect"
"strings"
Expand All @@ -27,6 +29,7 @@ import (
)

// What gets sent to Gemini, with the PDF content added
// WARNING: Changes to this prompt will invalidate all cached AI responses, only change if necessary
var budgetPrompt = `Parse the content of these PDFs and generate the following JSON schema.

{
Expand Down Expand Up @@ -242,13 +245,15 @@ var budgetPrompt = `Parse the content of these PDFs and generate the following J
}
}

- Use the full UTD school names in this title text: School of Arts, Humanities, and Technology; School of Behavioral and Brain Sciences; School of Economic, Political and Policy Sciences; School of Engineering and Computer Science; School of Interdisciplinary Studies; School of Management; School of Natural Sciences and Mathematics.
- Use the full UTD school names in this title-case text: School of Arts, Humanities, and Technology; School of Behavioral and Brain Sciences; School of Economic, Political and Policy Sciences; School of Engineering and Computer Science; School of Interdisciplinary Studies; School of Management; School of Natural Sciences and Mathematics.
- In older years: School of Arts, Technology, and Emerging Communication; School of Arts & Humanities.
- Replace Brian with Brain in the School of Behavioral and Brain Sciences name if it is misspelled in the PDF.
- Always use the data listed for %s, not any previous years.
- Do not infer, estimate, or guess any values.
- If a value is missing or unclear, return null for that field.
- Only values surrounded by parentheses in the tables should be considered negative.
- In FY 2023 and earlier, Service Departments Funds, Designated Funds, Auxiliary Expenses, and Restricted Funds are not grouped by school and are too long to parse. Thus these tables should be omitted, only for these years.
- In FY 2019 and earlier, some of the PDFs have been scanned in and thus many pages may be missing in the text extraction. If much or all but the preamble of a PDF is missing, exclude it from the output.

Content of PDFs:

Expand Down Expand Up @@ -334,7 +339,8 @@ func ParseBudgets(inDir string, outDir string, budgetsDir string, useBackupBudge
}
return nil
})
if err != nil {
// If error other than directory not existing, and we're not using backup budgets, panic
if err != nil && !(errors.Is(err, os.ErrNotExist) && useBackupBudgets) {
panic(err)
}

Expand Down Expand Up @@ -444,10 +450,6 @@ func parseBudgetPdfs(paths []string) (schema.Budget, error) {

// Get response
result = response.Candidates[0].Content.Parts[0].Text
log.Print("Token counts:")
log.Printf("Prompt: %d", response.UsageMetadata.PromptTokenCount)
log.Printf("Thoughts: %d", response.UsageMetadata.ThoughtsTokenCount)
log.Printf("Total: %d", response.UsageMetadata.TotalTokenCount)

// Set cache for next time
err = utils.SetCache(hash, result, apiBucket)
Expand Down
6 changes: 3 additions & 3 deletions runners/monthly.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@
./api-tools -headless -verbose -upload -map

# scrape, parse, and upload budgets
#./api-tools -headless -verbose -scrape -budgets -useBackupBudgets
#./api-tools -headless -verbose -parse -budgets -useBackupBudgets
#./api-tools -headless -verbose -upload -budgets -useBackupBudgets
./api-tools -headless -verbose -scrape -budgets -useBackupBudgets
./api-tools -headless -verbose -parse -budgets -useBackupBudgets
./api-tools -headless -verbose -upload -budgets -useBackupBudgets
Loading