Skip to content

Commit 9eff0ad

Browse files
christophdbclaude
andcommitted
Add auto-generated llms.txt and llms-full.txt for LLM consumption
A build-time script reads the nav structure from mkdocs.yml and generates llms.txt (compact overview with links) and llms-full.txt (full markdown content) into docs/ so MkDocs deploys them to the site root. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent b83ff41 commit 9eff0ad

3 files changed

Lines changed: 219 additions & 0 deletions

File tree

.github/workflows/deploy.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ jobs:
5353
- name: Install dependencies
5454
run: pip install -r requirements.txt
5555

56+
- name: Generate llms.txt and llms-full.txt
57+
run: python3 scripts/generate_llms_txt.py
58+
5659
- name: Build with mkdocs (strict mode)
5760
run: mkdocs build --strict --verbose
5861

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,7 @@
22
/site/
33
/.idea
44
.vscode
5+
6+
# Generated at build time by scripts/generate_llms_txt.py
7+
docs/llms.txt
8+
docs/llms-full.txt

scripts/generate_llms_txt.py

Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Generate llms.txt and llms-full.txt for developer.seatable.com.
4+
5+
Reads the nav structure from mkdocs.yml and produces:
6+
- docs/llms.txt (compact overview with links)
7+
- docs/llms-full.txt (full markdown content of all pages)
8+
9+
Usage:
10+
python3 scripts/generate_llms_txt.py
11+
"""
12+
13+
import os
14+
import re
15+
import sys
16+
17+
import yaml
18+
19+
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
20+
DOCS_DIR = os.path.join(REPO_ROOT, "docs")
21+
MKDOCS_YML = os.path.join(REPO_ROOT, "mkdocs.yml")
22+
BASE_URL = "https://developer.seatable.com"
23+
24+
# Files to skip (fragments, includes, non-content)
25+
SKIP_FILES = {"includes.md"}
26+
27+
28+
def load_mkdocs_config():
29+
# Custom loader that ignores !!python/name: and !!python/object: tags
30+
# which mkdocs.yml uses for plugins/extensions
31+
loader = yaml.SafeLoader
32+
loader.add_multi_constructor(
33+
"tag:yaml.org,2002:python/",
34+
lambda loader, suffix, node: None,
35+
)
36+
with open(MKDOCS_YML, "r") as f:
37+
return yaml.load(f, Loader=loader)
38+
39+
40+
def extract_nav_pages(nav, section_path=""):
41+
"""Recursively extract (section, title, md_path) tuples from the nav."""
42+
pages = []
43+
for item in nav:
44+
if isinstance(item, str):
45+
# Bare path like "ruby/index.md"
46+
pages.append((section_path, "", item))
47+
elif isinstance(item, dict):
48+
for title, value in item.items():
49+
if isinstance(value, str):
50+
# "Ruby: ruby/index.md" — top-level single-page section
51+
# Use title as section if no parent section
52+
section = section_path or title
53+
pages.append((section, title, value))
54+
elif isinstance(value, list):
55+
# Nested section
56+
pages.extend(extract_nav_pages(value, section_path=title))
57+
return pages
58+
59+
60+
def md_path_to_url(md_path):
61+
"""Convert a docs-relative markdown path to a site URL."""
62+
# index.md -> /
63+
# python/index.md -> /python/
64+
# python/objects/metadata.md -> /python/objects/metadata/
65+
url_path = md_path.replace(".md", "/")
66+
if url_path.endswith("index/"):
67+
url_path = url_path[: -len("index/")]
68+
return f"{BASE_URL}/{url_path}"
69+
70+
71+
def read_md_file(md_path):
72+
"""Read a markdown file from the docs directory, return its content."""
73+
full_path = os.path.join(DOCS_DIR, md_path)
74+
if not os.path.exists(full_path):
75+
print(f"Warning: {full_path} not found, skipping", file=sys.stderr)
76+
return None
77+
with open(full_path, "r") as f:
78+
return f.read()
79+
80+
81+
def clean_for_llm(content):
82+
"""Remove MkDocs-specific syntax that adds noise for LLMs."""
83+
# Remove include-markdown directives
84+
content = re.sub(
85+
r"\{%\s*include-markdown\s+.*?%\}", "", content, flags=re.DOTALL
86+
)
87+
# Remove admonition-style blocks but keep their content
88+
# e.g., !!! tip "Title"\n\n Content -> Content
89+
content = re.sub(r"^!!! \w+.*$", "", content, flags=re.MULTILINE)
90+
# Remove HTML comments
91+
content = re.sub(r"<!--.*?-->", "", content, flags=re.DOTALL)
92+
# Remove style blocks
93+
content = re.sub(r"<style>.*?</style>", "", content, flags=re.DOTALL)
94+
# Collapse 3+ blank lines to 2
95+
content = re.sub(r"\n{3,}", "\n\n", content)
96+
return content.strip()
97+
98+
99+
# ---------------------------------------------------------------------------
100+
# llms.txt (compact index)
101+
# ---------------------------------------------------------------------------
102+
def generate_llms_txt(config, nav_pages):
103+
site_name = config.get("site_name", "SeaTable Developer Manual")
104+
site_desc = config.get("site_description", "").strip()
105+
106+
lines = [
107+
f"# {site_name}",
108+
"",
109+
f"> {site_desc}",
110+
"",
111+
]
112+
113+
# Group pages by section
114+
sections = {}
115+
for section, title, md_path in nav_pages:
116+
if os.path.basename(md_path) in SKIP_FILES:
117+
continue
118+
sections.setdefault(section or "Introduction", []).append((title, md_path))
119+
120+
lines.append("## Sections")
121+
lines.append("")
122+
123+
for section, pages in sections.items():
124+
lines.append(f"### {section}")
125+
lines.append("")
126+
for title, md_path in pages:
127+
url = md_path_to_url(md_path)
128+
label = title or section
129+
lines.append(f"- [{label}]({url})")
130+
lines.append("")
131+
132+
lines += [
133+
"## Complete Content",
134+
"",
135+
f"- [llms-full.txt]({BASE_URL}/llms-full.txt):"
136+
" Complete developer manual with all pages, code examples, and API references",
137+
"",
138+
"## Optional",
139+
"",
140+
"- [SeaTable Website](https://seatable.com): Product website with features, pricing, and use cases",
141+
"- [REST API Reference](https://api.seatable.com): Interactive REST API documentation with all endpoints",
142+
"- [Admin Manual](https://admin.seatable.com): Self-hosting installation, configuration, and administration",
143+
"- [Community Forum](https://forum.seatable.com): Community support, discussions, and feature requests",
144+
]
145+
return "\n".join(lines) + "\n"
146+
147+
148+
# ---------------------------------------------------------------------------
149+
# llms-full.txt (complete content)
150+
# ---------------------------------------------------------------------------
151+
def generate_llms_full_txt(config, nav_pages):
152+
site_name = config.get("site_name", "SeaTable Developer Manual")
153+
site_desc = config.get("site_description", "").strip()
154+
155+
lines = [
156+
f"# {site_name}",
157+
"",
158+
f"{site_desc}",
159+
"",
160+
f"Base URL: {BASE_URL}",
161+
"",
162+
]
163+
164+
current_section = None
165+
for section, title, md_path in nav_pages:
166+
if os.path.basename(md_path) in SKIP_FILES:
167+
continue
168+
169+
content = read_md_file(md_path)
170+
if content is None:
171+
continue
172+
173+
content = clean_for_llm(content)
174+
175+
# Section header
176+
if section and section != current_section:
177+
lines += [f"## {section}", ""]
178+
current_section = section
179+
180+
# Page content
181+
url = md_path_to_url(md_path)
182+
lines.append(f"Source: {url}")
183+
lines.append("")
184+
lines.append(content)
185+
lines += ["", "---", ""]
186+
187+
return "\n".join(lines) + "\n"
188+
189+
190+
# ---------------------------------------------------------------------------
191+
# main
192+
# ---------------------------------------------------------------------------
193+
def main():
194+
config = load_mkdocs_config()
195+
nav = config.get("nav", [])
196+
nav_pages = extract_nav_pages(nav)
197+
198+
llms_txt = generate_llms_txt(config, nav_pages)
199+
llms_txt_path = os.path.join(DOCS_DIR, "llms.txt")
200+
with open(llms_txt_path, "w") as f:
201+
f.write(llms_txt)
202+
print(f"llms.txt — {len(llms_txt):,} bytes, {len(nav_pages)} pages")
203+
204+
llms_full = generate_llms_full_txt(config, nav_pages)
205+
llms_full_path = os.path.join(DOCS_DIR, "llms-full.txt")
206+
with open(llms_full_path, "w") as f:
207+
f.write(llms_full)
208+
print(f"llms-full.txt — {len(llms_full):,} bytes")
209+
210+
211+
if __name__ == "__main__":
212+
main()

0 commit comments

Comments
 (0)