-
Notifications
You must be signed in to change notification settings - Fork 14
Add llms.txt and llms-full.txt #436
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
fntn
wants to merge
8
commits into
main
Choose a base branch
from
docs-llms-dot-txt
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+200
−0
Open
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
223e295
Add llms.txt and llms-full.txt
fntn 49f9acc
AI code review feedback
fntn 08a33c2
Adding path to robots.txt and meta tags
fntn 5860834
Potential fix for pull request finding
fntn 3d8ef63
Remove invalid Sitemap directive pointing to llms.txt from robots.txt
Copilot 5317dde
Better MDX parsing
fntn 2fde2f7
Merge branch 'docs-llms-dot-txt' of github.com:cloudsmith-io/cloudsmi…
fntn be67907
AI code review feedback
fntn File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| import { NextResponse } from 'next/server'; | ||
| import { buildFullDocs } from '@/lib/llms'; | ||
|
|
||
| export const dynamic = 'force-static'; | ||
|
|
||
| export async function GET() { | ||
| const content = await buildFullDocs(); | ||
| return new NextResponse(content, { | ||
| headers: { 'Content-Type': 'text/plain; charset=utf-8' }, | ||
| }); | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| import { NextResponse } from 'next/server'; | ||
| import { buildIndex } from '@/lib/llms'; | ||
|
|
||
| export const dynamic = 'force-static'; | ||
|
|
||
| export async function GET() { | ||
| const content = await buildIndex(); | ||
| return new NextResponse(content, { | ||
| headers: { 'Content-Type': 'text/plain; charset=utf-8' }, | ||
| }); | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,170 @@ | ||
| import fs from 'fs'; | ||
| import path from 'path'; | ||
|
|
||
| import menuData from '@/content/menu.json'; | ||
|
|
||
| import { loadMdxInfo } from './markdown/util'; | ||
|
|
||
| const BASE_URL = 'https://docs.cloudsmith.com'; | ||
|
|
||
| interface MenuItem { | ||
| title: string; | ||
| path?: string; | ||
| children?: MenuItem[]; | ||
| } | ||
|
|
||
| interface MenuSection { | ||
| title: string; | ||
| path?: string; | ||
| icon?: string; | ||
| children?: MenuItem[]; | ||
| } | ||
|
|
||
| // Strip trailing slash so URLs are consistent | ||
| function cleanPath(p: string): string { | ||
| return p.endsWith('/') ? p.slice(0, -1) : p; | ||
| } | ||
|
|
||
| // Recursively collects all internal page paths from a menu subtree | ||
| function collectMenuPaths(items: MenuItem[], paths: Set<string>): void { | ||
| for (const item of items) { | ||
| if (item.path && !item.path.startsWith('http')) { | ||
| const slug = cleanPath(item.path).slice(1); | ||
| if (slug) paths.add(slug); | ||
| } | ||
| if (item.children) collectMenuPaths(item.children, paths); | ||
| } | ||
| } | ||
|
|
||
| // Recursively renders menu items as indented markdown links | ||
| function renderItems(items: MenuItem[], depth: number): string[] { | ||
| const indent = ' '.repeat(depth); | ||
| const lines: string[] = []; | ||
| for (const item of items) { | ||
| if (!item.path || item.path.startsWith('http')) { | ||
| // Group label with no path — render children at same depth | ||
| if (item.children) lines.push(...renderItems(item.children, depth)); | ||
| continue; | ||
| } | ||
| const url = `${BASE_URL}${cleanPath(item.path)}`; | ||
| lines.push(`${indent}- [${item.title}](${url})`); | ||
| if (item.children) lines.push(...renderItems(item.children, depth + 1)); | ||
| } | ||
| return lines; | ||
| } | ||
|
|
||
| // Builds the llms.txt index: a compact nav tree of all doc sections and pages | ||
| export async function buildIndex(): Promise<string> { | ||
| const menu = menuData as Record<string, MenuSection>; | ||
|
|
||
| const lines: string[] = [ | ||
| '# Cloudsmith Documentation', | ||
| '', | ||
| `> The universal artifact registry for secure software distribution and dependency management. Full content dump: ${BASE_URL}/llms-full.txt`, | ||
| '', | ||
| `[Cloudsmith Documentation](${BASE_URL}): The universal artifact registry for secure software distribution and dependency management.`, | ||
| '', | ||
| ]; | ||
|
fntn marked this conversation as resolved.
|
||
|
|
||
| for (const [, section] of Object.entries(menu)) { | ||
| // Skip non-content sections (e.g. mobileNavbar) — they have no internal path | ||
| if (!section.path?.startsWith('/') || !section.children) continue; | ||
|
|
||
| const sectionUrl = `${BASE_URL}${cleanPath(section.path)}`; | ||
| lines.push(`- [${section.title}](${sectionUrl})`); | ||
| lines.push(...renderItems(section.children, 1)); | ||
| lines.push(''); | ||
| } | ||
|
|
||
| return lines.join('\n'); | ||
| } | ||
|
|
||
| // Strips MDX-specific syntax (imports, JSX components) to leave plain markdown | ||
| function stripMdx(content: string): string { | ||
| let result = content; | ||
|
|
||
| // Remove import statements | ||
| result = result.replace(/^import\s+[^\n]+\n?/gm, ''); | ||
|
|
||
| // Extract prose from props before tags are stripped | ||
|
|
||
| // Note: headline/heading prop becomes a bold label before the note content | ||
| result = result.replace(/<Note[^>]*\bheadline="([^"]+)"[^>]*>/g, '\n**$1**\n'); | ||
| result = result.replace(/<Note[^>]*\bheading="([^"]+)"[^>]*>/g, '\n**$1**\n'); | ||
|
|
||
| // Card: self-closing, so props are the only content — render as "**[title](href)**: description" | ||
| result = result.replace(/<Card([^>]*\/>)/g, (_, props) => { | ||
| const title = props.match(/\btitle="([^"]+)"/)?.[1]; | ||
| const description = props.match(/\bdescription="([^"]+)"/)?.[1]; | ||
| const href = props.match(/\bhref="([^"]+)"/)?.[1]; | ||
| if (!title && !description) return ''; | ||
| const titlePart = title && href ? `**[${title}](${href})**` : title ? `**${title}**` : ''; | ||
| return '\n' + [titlePart, description].filter(Boolean).join(': ') + '\n'; | ||
| }); | ||
|
|
||
| // BlockImage: render alt text as a plain image description | ||
| result = result.replace(/<BlockImage[^>]*\balt="([^"]+)"[^>]*\/?>/g, '\n[Image: $1]\n'); | ||
|
|
||
| // Remove remaining self-closing JSX components (no extractable prose) | ||
| result = result.replace(/<[A-Z][a-zA-Z]*[^>]*\/>/g, ''); | ||
|
|
||
| // Strip JSX block tags but keep their inner content | ||
| result = result.replace(/<[A-Z][a-zA-Z]*[^>]*>/g, ''); | ||
| result = result.replace(/<\/[A-Z][a-zA-Z]*>/g, ''); | ||
|
|
||
| // Remove common HTML-in-MDX elements | ||
| result = result.replace(/<br\s*\/?>/gi, ''); | ||
|
|
||
| return result.replace(/\n{3,}/g, '\n\n').trim(); | ||
| } | ||
|
|
||
| // Builds the llms-full.txt dump: full MDX content for every page present in the menu | ||
| export async function buildFullDocs(): Promise<string> { | ||
| const KNOWN_SECTIONS = ['documentation', 'guides', 'api'] as const; | ||
| type SectionKey = (typeof KNOWN_SECTIONS)[number]; | ||
|
|
||
| const menu = menuData as Record<string, MenuSection>; | ||
| const sections = Object.entries(menu) | ||
| .filter(([key, s]) => s.path?.startsWith('/') && (KNOWN_SECTIONS as readonly string[]).includes(key)) | ||
| .map(([key, s]) => ({ key: key as SectionKey, label: s.title })); | ||
|
|
||
| const menuPaths = new Set<string>(); | ||
| for (const [, section] of Object.entries(menu)) { | ||
| if (section.path?.startsWith('/')) menuPaths.add(cleanPath(section.path).slice(1)); | ||
| if (section.children) collectMenuPaths(section.children, menuPaths); | ||
| } | ||
|
|
||
| const parts: string[] = [ | ||
| '# Cloudsmith Documentation', | ||
| '', | ||
| '> The universal artifact registry for secure software distribution and dependency management.', | ||
| '', | ||
| 'Cloudsmith docs cover repositories, package formats, integrations, authentication, CI/CD workflows, and more.', | ||
| '', | ||
| ]; | ||
|
|
||
| for (const section of sections) { | ||
| const prefix = section.key !== 'documentation' ? `${section.key}/` : ''; | ||
| const files = (await loadMdxInfo(section.key)).filter((info) => menuPaths.has(cleanPath(`${prefix}${info.slug}`))); | ||
|
|
||
| parts.push('---'); | ||
| parts.push(''); | ||
| parts.push(`# ${section.label}`); | ||
| parts.push(''); | ||
|
|
||
| for (const info of files) { | ||
| const filePath = path.join(process.cwd(), 'src/content', info.file); | ||
| const raw = fs.readFileSync(filePath, 'utf-8'); | ||
| const cleaned = stripMdx(raw); | ||
|
|
||
| if (!cleaned) continue; | ||
|
|
||
| parts.push(`<!-- ${BASE_URL}/${cleanPath(`${prefix}${info.slug}`)} -->`); | ||
| parts.push(''); | ||
| parts.push(cleaned); | ||
| parts.push(''); | ||
| } | ||
| } | ||
|
|
||
| return parts.join('\n'); | ||
| } | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.