-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhandler_agg.go
More file actions
106 lines (90 loc) · 3.38 KB
/
handler_agg.go
File metadata and controls
106 lines (90 loc) · 3.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
package main
import (
"context"
"database/sql"
"errors"
"fmt"
"time"
"github.com/google/uuid"
"github.com/lib/pq"
"github.com/opsm0nkey/gator-cli/internal/database"
)
func handlerFetchFeed(ctx context.Context, s *state, cmd command) error {
s.Logger.Debug("running handlerFetchFeed cmd:", "args", cmd.args)
if len(cmd.args) != 1 {
return fmt.Errorf("the fetch-feed command expects exactly one argument - time_between_reqs containing a time value such (e.g. 1s | 1m | 1h)")
}
timeBetweenReqs, err := time.ParseDuration(cmd.args[0])
if err != nil {
return fmt.Errorf("error parsing time duration argument: %w", err)
}
fmt.Printf("Collecting feeds every %s\n", timeBetweenReqs)
// start a ticker to scrape feeds
ticker := time.NewTicker(timeBetweenReqs)
for ; ; <-ticker.C {
if err := scrapeFeeds(ctx, s); err != nil {
return fmt.Errorf("error scraping feed: %w\n", err)
}
}
}
// feed scraper - gets next feed from db, fetches the content and prints it to console
func scrapeFeeds(ctx context.Context, s *state) error {
s.Logger.Debug("running scrapeFeeds helper")
// get the next feed to fetch
s.Logger.Debug("scrapeFeeds: calling GetNextFeedToFetch")
nextFeed, err := s.Db.GetNextFeedToFetch(ctx)
if err != nil {
return fmt.Errorf("error fetching feed record: %w", err)
}
s.Logger.Debug("scrapeFeeds: fetched next feed:", "feed", nextFeed)
// mark feed as fetched
if err := s.Db.MarkFeedFetched(ctx, nextFeed.ID); err != nil {
return fmt.Errorf("error marking feed as fetched: %w", err)
}
s.Logger.Debug("scrapeFeeds: marked feed as fetched", "feedID", nextFeed.ID, "feedName", nextFeed.Name)
// fetch the feed content using the feed client
s.Logger.Debug("scrapeFeeds: calling FetchFeed with url:", "url", nextFeed.Url)
feedContent, err := s.RSSClient.FetchFeed(ctx, nextFeed.Url)
if err != nil {
return fmt.Errorf("error fetching feed content for url %s: %w", nextFeed.Url, err)
}
//save to posts
s.Logger.Debug("scrapeFeeds: creating post records", "channel", feedContent.Channel.Link)
for _, item := range feedContent.Channel.Item {
publishedAt := sql.NullTime{}
// convert Published Date to time.Time using RFC1123Z (standard with timezone format).
// if it doesn't error (i.e. it's not null), update publishedAt to pass-thru
if time, err := time.Parse("RFC1123Z", item.PubDate); err == nil {
publishedAt = sql.NullTime{
Time: time,
Valid: true,
}
}
post, err := s.Db.CreatePost(ctx, database.CreatePostParams{
ID: uuid.New(),
Title: item.Title,
Url: item.Link,
Description: validateNullField(item.Description),
PublishedAt: publishedAt,
FeedID: nextFeed.ID,
})
if err != nil {
// check if error is due to unique constraint violation (i.e. post already exists), if so, skip, otherwise return error
var pqErr *pq.Error
if errors.As(err, &pqErr) && pqErr.Code == "23505" {
s.Logger.Debug("scrapeFeeds: post already exists, skipping", "postTitle", item.Title)
continue
}
return fmt.Errorf("error creating post record for feed %s: %w", nextFeed.Name, err)
}
s.Logger.Debug("scrapeFeeds: created post record", "postID", post.ID, "postTitle", post.Title)
}
s.Logger.Debug("scrapeFeeds: finished fetching feed content")
return nil
}
func validateNullField(s string) sql.NullString {
if len(s) == 0 {
return sql.NullString{Valid: false}
}
return sql.NullString{String: s, Valid: true}
}