Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ All under `/v1/`:
| Endpoint | Purpose |
|----------|---------|
| `GET /health` | Health check (Postgres + Meilisearch status) |
| `GET /search?q=&platform=&sort=&limit=&offset=` | Meilisearch-powered search. Auto-triggers GitHub passthrough if <5 results. Reads optional `X-GitHub-Token` header to run passthrough on the user's 5000/hr quota instead of the backend's fallback quota. Response carries `passthroughAttempted: Boolean` so clients can distinguish "index was warm but returned nothing" from "GitHub also has nothing". |
| `GET /search?q=&platform=&sort=&limit=&offset=` | Meilisearch-powered search. Auto-triggers GitHub passthrough if <5 results. `sort` ∈ {`relevance` (default), `stars`, `recent` / `releases` (alias, by latest stable release date), `updated` (by repo `updated_at_gh`)}. `relevance` requires `q`; the others allow empty `q` for browse-mode listings. `sort=updated` is routed directly to Postgres FTS until the fetcher repo's `meili_sync.py` adds `updated_at_gh` to Meili's sortable-attributes. Reads optional `X-GitHub-Token` header to run passthrough on the user's 5000/hr quota instead of the backend's fallback quota. Response carries `passthroughAttempted: Boolean` so clients can distinguish "index was warm but returned nothing" from "GitHub also has nothing". |
| `GET /search/explore?q=&platform=&page=` | User-triggered deep GitHub search, paginated, ingests into index. Also reads `X-GitHub-Token`. Cold-path latency is 10–30s — clients must use a 30s timeout. |
| `GET /categories/{trending\|new-releases\|most-popular}/{android\|windows\|macos\|linux}` | Pre-ranked repo lists. Sort order is `search_score DESC NULLS LAST, rank ASC` — static `rank` is only the tie-breaker once behavioral signals exist. |
| `GET /topics/{privacy\|media\|productivity\|networking\|dev-tools}/{platform}` | Topic-bucketed repos. Same dynamic ordering as categories. |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,13 @@ class MeilisearchClient(
}
}

// `recent` is the legacy name; `releases` is the new public name --
// both alias the same release-date sort. `updated` is the new
// GitHub-parity sort by repo-level updated_at_gh.
val sortList = when (sort) {
"stars" -> listOf("stars:desc")
"recent" -> listOf("latest_release_date:desc")
"recent", "releases" -> listOf("latest_release_date:desc")
"updated" -> listOf("updated_at_gh:desc")
else -> emptyList() // relevance = Meilisearch default ranking
}

Expand Down
8 changes: 8 additions & 0 deletions src/main/kotlin/zed/rainxch/githubstore/db/RepoRepository.kt
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ class RepoRepository {
openIssuesCount = this[Repos.openIssues],
licenseSpdxId = this[Repos.licenseSpdxId],
licenseName = this[Repos.licenseName],
license = nestedLicense(this[Repos.licenseSpdxId], this[Repos.licenseName]),
language = this[Repos.language],
topics = this[Repos.topics],
releasesUrl = "${this[Repos.htmlUrl]}/releases",
Expand All @@ -109,3 +110,10 @@ class RepoRepository {
}

}

// Builds the nested RepoLicense from the flat columns. Returns null when
// both inputs are null so the JSON field is `"license": null` rather than
// `"license": {"spdxId": null, "name": null}` for licenseless repos.
internal fun nestedLicense(spdxId: String?, name: String?): zed.rainxch.githubstore.model.RepoLicense? =
if (spdxId == null && name == null) null
else zed.rainxch.githubstore.model.RepoLicense(spdxId = spdxId, name = name)
29 changes: 24 additions & 5 deletions src/main/kotlin/zed/rainxch/githubstore/db/SearchRepository.kt
Original file line number Diff line number Diff line change
Expand Up @@ -29,27 +29,42 @@ class SearchRepository {

// search_score tie-breaker on every path so behavioral signal still
// disambiguates within the primary sort's equivalence class.
// `recent` and `releases` are aliases for "by release date" -- the
// newer name aligns with the GET /v1/search?sort=releases option
// exposed to clients (matches user intent: stable releases first).
// `updated` mirrors GitHub's repo-level `updated_at` (any push,
// not necessarily a release).
val orderClause = when (sort) {
"stars" -> "ORDER BY stars DESC, search_score DESC NULLS LAST"
"recent" -> "ORDER BY latest_release_date DESC NULLS LAST, search_score DESC NULLS LAST"
"recent", "releases" -> "ORDER BY latest_release_date DESC NULLS LAST, search_score DESC NULLS LAST"
"updated" -> "ORDER BY updated_at_gh DESC NULLS LAST, search_score DESC NULLS LAST"
else -> "ORDER BY ts_rank(tsv_search, plainto_tsquery('english', ?)) DESC, search_score DESC NULLS LAST"
}
// Browse mode: empty query + non-relevance sort skips the ts_match
// filter entirely. Clients use this for "no search box, just sort
// the catalog" UX (Recently-Updated / Recent-Releases home tabs).
val browseMode = query.isBlank() && sort != "relevance"

val sql = buildString {
append(
"""
SELECT id, full_name, owner, name, owner_avatar_url, description, default_branch,
html_url, stars, forks, language, topics,
html_url, stars, forks, open_issues, license_spdx_id, license_name,
language, topics,
latest_release_date, latest_release_tag, download_count,
has_installers_android, has_installers_windows,
has_installers_macos, has_installers_linux,
trending_score, popularity_score, search_score,
updated_at_gh, created_at_gh
FROM repos
WHERE tsv_search @@ plainto_tsquery('english', ?)
""".trimIndent()
)
if (platformColumn != null) append(" AND $platformColumn = true")
// WHERE clause skipped in browse mode -- caller wants the whole
// catalog sorted by `sort`, not a text-matched subset.
if (!browseMode) append(" WHERE tsv_search @@ plainto_tsquery('english', ?)")
if (platformColumn != null) {
append(if (browseMode) " WHERE " else " AND ").append("$platformColumn = true")
}
append(" ").append(orderClause).append(" LIMIT ? OFFSET ?")
}

Expand All @@ -58,7 +73,7 @@ class SearchRepository {

conn.prepareStatement(sql).use { stmt ->
var i = 1
stmt.setString(i++, query)
if (!browseMode) stmt.setString(i++, query)
if (sort == "relevance") stmt.setString(i++, query) // ts_rank in ORDER BY
stmt.setInt(i++, limit)
stmt.setInt(i, offset)
Expand Down Expand Up @@ -91,6 +106,10 @@ class SearchRepository {
htmlUrl = rs.getString("html_url"),
stargazersCount = rs.getInt("stars"),
forksCount = rs.getInt("forks"),
openIssuesCount = rs.getInt("open_issues"),
licenseSpdxId = rs.getString("license_spdx_id"),
licenseName = rs.getString("license_name"),
license = nestedLicense(rs.getString("license_spdx_id"), rs.getString("license_name")),
language = rs.getString("language"),
topics = topics,
releasesUrl = "${rs.getString("html_url")}/releases",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,7 @@ class GitHubSearchClient(
openIssuesCount = repo.openIssuesCount,
licenseSpdxId = repo.license?.spdxId,
licenseName = repo.license?.name,
license = repo.license?.let { zed.rainxch.githubstore.model.RepoLicense(spdxId = it.spdxId, name = it.name) },
language = repo.language,
topics = repo.topics,
releasesUrl = "${repo.htmlUrl}/releases",
Expand Down
15 changes: 15 additions & 0 deletions src/main/kotlin/zed/rainxch/githubstore/model/RepoResponse.kt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,17 @@ data class RepoOwner(
val avatarUrl: String?,
)

// Nested form of the GitHub-detected license. Same data as the flat
// `licenseSpdxId` / `licenseName` fields below; this shape matches the
// upstream GitHub object so a client doing direct-GitHub fallback can use
// one DTO. Prefer this nested form on new client code; the flat fields
// are kept for back-compat with shipped client builds.
@Serializable
data class RepoLicense(
val spdxId: String? = null,
val name: String? = null,
)

@Serializable
data class RepoResponse(
val id: Long,
Expand All @@ -29,6 +40,10 @@ data class RepoResponse(
// version ("MIT License").
val licenseSpdxId: String? = null,
val licenseName: String? = null,
// Nested form of the same data, matching upstream GitHub's shape.
// Clients should prefer this; the flat fields above will be removed
// after the next client release migrates.
val license: RepoLicense? = null,
val language: String?,
val topics: List<String>,
val releasesUrl: String?,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ internal fun GitHubRepo.toMetadataOnlyResponse(): RepoResponse = RepoResponse(
openIssuesCount = openIssuesCount,
licenseSpdxId = license?.spdxId,
licenseName = license?.name,
license = license?.let { zed.rainxch.githubstore.model.RepoLicense(spdxId = it.spdxId, name = it.name) },
language = language,
topics = topics,
releasesUrl = "$htmlUrl/releases",
Expand Down
63 changes: 49 additions & 14 deletions src/main/kotlin/zed/rainxch/githubstore/routes/SearchRoutes.kt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ import zed.rainxch.githubstore.model.RepoResponse
import zed.rainxch.githubstore.model.SearchResponse

private val VALID_PLATFORMS = setOf("android", "windows", "macos", "linux")
private val VALID_SORTS = setOf("relevance", "stars", "recent")
// `recent` kept for back-compat; `releases` is the public-facing alias.
// `updated` mirrors GitHub's repo-level updated_at sort.
private val VALID_SORTS = setOf("relevance", "stars", "recent", "releases", "updated")
private const val ON_DEMAND_THRESHOLD = 5

fun Route.searchRoutes(
Expand All @@ -25,26 +27,30 @@ fun Route.searchRoutes(
metrics: SearchMetricsRegistry,
) {
get("/search") {
val query = call.request.queryParameters["q"]
if (query.isNullOrBlank()) {
// Empty `q` is allowed when `sort` is anything other than relevance --
// browse mode for "Recently Updated" / "Recent Releases" home tabs.
// sort=relevance still requires a query because text-rank needs one.
val rawQuery = call.request.queryParameters["q"]
val sort = call.request.queryParameters["sort"] ?: "relevance"
if (sort !in VALID_SORTS) {
return@get call.respond(
HttpStatusCode.BadRequest, mapOf("error" to "Missing query parameter 'q'")
HttpStatusCode.BadRequest,
mapOf("error" to "Invalid sort. Must be one of: $VALID_SORTS")
)
}

val platform = call.request.queryParameters["platform"]
if (platform != null && platform !in VALID_PLATFORMS) {
if ((rawQuery.isNullOrBlank()) && sort == "relevance") {
return@get call.respond(
HttpStatusCode.BadRequest,
mapOf("error" to "Invalid platform. Must be one of: $VALID_PLATFORMS")
mapOf("error" to "Missing query parameter 'q' (required when sort=relevance)")
)
}
val query = rawQuery.orEmpty()

val sort = call.request.queryParameters["sort"] ?: "relevance"
if (sort !in VALID_SORTS) {
val platform = call.request.queryParameters["platform"]
if (platform != null && platform !in VALID_PLATFORMS) {
return@get call.respond(
HttpStatusCode.BadRequest,
mapOf("error" to "Invalid sort. Must be one of: $VALID_SORTS")
mapOf("error" to "Invalid platform. Must be one of: $VALID_PLATFORMS")
)
}

Expand All @@ -53,6 +59,31 @@ fun Route.searchRoutes(

val userToken = call.request.headers["X-GitHub-Token"]?.takeIf { it.isNotBlank() }

// sort=updated needs `updated_at_gh` in Meili's sortable-attributes
// config -- not yet pushed by the fetcher repo's meili_sync.py.
// Route it directly to Postgres FTS where the column already exists.
// Once the fetcher learns the field, this branch can drop and Meili
// serves the sort with full search semantics.
if (sort == "updated") {
val startTime = System.currentTimeMillis()
val items = searchRepository.search(
query = query,
platform = platform,
sort = sort,
limit = limit,
offset = offset,
)
val elapsed = (System.currentTimeMillis() - startTime).toInt()
metrics.recordPostgresFallback(items.size, elapsed)
call.response.header(HttpHeaders.CacheControl, "public, max-age=15, s-maxage=30")
return@get call.respond(SearchResponse(
items = items,
totalHits = items.size,
processingTimeMs = elapsed,
source = "postgres",
))
}

// Try Meilisearch first, fall back to Postgres FTS
try {
val result = meilisearch.search(
Expand All @@ -68,8 +99,10 @@ fun Route.searchRoutes(
var source = "meilisearch"
var passthroughAttempted = false

// On-demand: if few results, also search GitHub and ingest
if (items.size < ON_DEMAND_THRESHOLD && offset == 0) {
// On-demand passthrough only makes sense for actual text queries.
// Browse mode (empty q with a non-relevance sort) is a catalog
// listing -- no GitHub call is appropriate.
if (query.isNotBlank() && items.size < ON_DEMAND_THRESHOLD && offset == 0) {
passthroughAttempted = true
val githubResults = githubSearch.searchAndIngest(query, platform, limit = 10, userToken = userToken)
if (githubResults.isNotEmpty()) {
Expand All @@ -88,7 +121,8 @@ fun Route.searchRoutes(

// Log near-misses too — queries with 1-4 results are tractable training
// candidates; the worker prioritizes zero-result rows via result_count.
if (items.size < ON_DEMAND_THRESHOLD) {
// Browse mode has no query to log.
if (query.isNotBlank() && items.size < ON_DEMAND_THRESHOLD) {
searchMissRepository.logMiss(query, resultCount = items.size)
}

Expand Down Expand Up @@ -179,6 +213,7 @@ private fun zed.rainxch.githubstore.db.MeiliRepoHit.toRepoResponse() = RepoRespo
openIssuesCount = open_issues,
licenseSpdxId = license_spdx_id,
licenseName = license_name,
license = zed.rainxch.githubstore.db.nestedLicense(license_spdx_id, license_name),
language = language,
topics = topics,
releasesUrl = "$html_url/releases",
Expand Down
Loading