|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Generate llms.txt and llms-full.txt for developer.seatable.com. |
| 4 | +
|
| 5 | +Reads the nav structure from mkdocs.yml and produces: |
| 6 | + - docs/llms.txt (compact overview with links) |
| 7 | + - docs/llms-full.txt (full markdown content of all pages) |
| 8 | +
|
| 9 | +Usage: |
| 10 | + python3 scripts/generate_llms_txt.py |
| 11 | +""" |
| 12 | + |
| 13 | +import os |
| 14 | +import re |
| 15 | +import sys |
| 16 | + |
| 17 | +import yaml |
| 18 | + |
| 19 | +REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
| 20 | +DOCS_DIR = os.path.join(REPO_ROOT, "docs") |
| 21 | +MKDOCS_YML = os.path.join(REPO_ROOT, "mkdocs.yml") |
| 22 | +BASE_URL = "https://developer.seatable.com" |
| 23 | + |
| 24 | +# Files to skip (fragments, includes, non-content) |
| 25 | +SKIP_FILES = {"includes.md"} |
| 26 | + |
| 27 | + |
| 28 | +def load_mkdocs_config(): |
| 29 | + # Custom loader that ignores !!python/name: and !!python/object: tags |
| 30 | + # which mkdocs.yml uses for plugins/extensions |
| 31 | + loader = yaml.SafeLoader |
| 32 | + loader.add_multi_constructor( |
| 33 | + "tag:yaml.org,2002:python/", |
| 34 | + lambda loader, suffix, node: None, |
| 35 | + ) |
| 36 | + with open(MKDOCS_YML, "r") as f: |
| 37 | + return yaml.load(f, Loader=loader) |
| 38 | + |
| 39 | + |
| 40 | +def extract_nav_pages(nav, section_path=""): |
| 41 | + """Recursively extract (section, title, md_path) tuples from the nav.""" |
| 42 | + pages = [] |
| 43 | + for item in nav: |
| 44 | + if isinstance(item, str): |
| 45 | + # Bare path like "ruby/index.md" |
| 46 | + pages.append((section_path, "", item)) |
| 47 | + elif isinstance(item, dict): |
| 48 | + for title, value in item.items(): |
| 49 | + if isinstance(value, str): |
| 50 | + # "Ruby: ruby/index.md" — top-level single-page section |
| 51 | + # Use title as section if no parent section |
| 52 | + section = section_path or title |
| 53 | + pages.append((section, title, value)) |
| 54 | + elif isinstance(value, list): |
| 55 | + # Nested section |
| 56 | + pages.extend(extract_nav_pages(value, section_path=title)) |
| 57 | + return pages |
| 58 | + |
| 59 | + |
| 60 | +def md_path_to_url(md_path): |
| 61 | + """Convert a docs-relative markdown path to a site URL.""" |
| 62 | + # index.md -> / |
| 63 | + # python/index.md -> /python/ |
| 64 | + # python/objects/metadata.md -> /python/objects/metadata/ |
| 65 | + url_path = md_path.replace(".md", "/") |
| 66 | + if url_path.endswith("index/"): |
| 67 | + url_path = url_path[: -len("index/")] |
| 68 | + return f"{BASE_URL}/{url_path}" |
| 69 | + |
| 70 | + |
| 71 | +def read_md_file(md_path): |
| 72 | + """Read a markdown file from the docs directory, return its content.""" |
| 73 | + full_path = os.path.join(DOCS_DIR, md_path) |
| 74 | + if not os.path.exists(full_path): |
| 75 | + print(f"Warning: {full_path} not found, skipping", file=sys.stderr) |
| 76 | + return None |
| 77 | + with open(full_path, "r") as f: |
| 78 | + return f.read() |
| 79 | + |
| 80 | + |
| 81 | +def clean_for_llm(content): |
| 82 | + """Remove MkDocs-specific syntax that adds noise for LLMs.""" |
| 83 | + # Remove include-markdown directives |
| 84 | + content = re.sub( |
| 85 | + r"\{%\s*include-markdown\s+.*?%\}", "", content, flags=re.DOTALL |
| 86 | + ) |
| 87 | + # Remove admonition-style blocks but keep their content |
| 88 | + # e.g., !!! tip "Title"\n\n Content -> Content |
| 89 | + content = re.sub(r"^!!! \w+.*$", "", content, flags=re.MULTILINE) |
| 90 | + # Remove HTML comments |
| 91 | + content = re.sub(r"<!--.*?-->", "", content, flags=re.DOTALL) |
| 92 | + # Remove style blocks |
| 93 | + content = re.sub(r"<style>.*?</style>", "", content, flags=re.DOTALL) |
| 94 | + # Collapse 3+ blank lines to 2 |
| 95 | + content = re.sub(r"\n{3,}", "\n\n", content) |
| 96 | + return content.strip() |
| 97 | + |
| 98 | + |
| 99 | +# --------------------------------------------------------------------------- |
| 100 | +# llms.txt (compact index) |
| 101 | +# --------------------------------------------------------------------------- |
| 102 | +def generate_llms_txt(config, nav_pages): |
| 103 | + site_name = config.get("site_name", "SeaTable Developer Manual") |
| 104 | + site_desc = config.get("site_description", "").strip() |
| 105 | + |
| 106 | + lines = [ |
| 107 | + f"# {site_name}", |
| 108 | + "", |
| 109 | + f"> {site_desc}", |
| 110 | + "", |
| 111 | + ] |
| 112 | + |
| 113 | + # Group pages by section |
| 114 | + sections = {} |
| 115 | + for section, title, md_path in nav_pages: |
| 116 | + if os.path.basename(md_path) in SKIP_FILES: |
| 117 | + continue |
| 118 | + sections.setdefault(section or "Introduction", []).append((title, md_path)) |
| 119 | + |
| 120 | + lines.append("## Sections") |
| 121 | + lines.append("") |
| 122 | + |
| 123 | + for section, pages in sections.items(): |
| 124 | + lines.append(f"### {section}") |
| 125 | + lines.append("") |
| 126 | + for title, md_path in pages: |
| 127 | + url = md_path_to_url(md_path) |
| 128 | + label = title or section |
| 129 | + lines.append(f"- [{label}]({url})") |
| 130 | + lines.append("") |
| 131 | + |
| 132 | + lines += [ |
| 133 | + "## Complete Content", |
| 134 | + "", |
| 135 | + f"- [llms-full.txt]({BASE_URL}/llms-full.txt):" |
| 136 | + " Complete developer manual with all pages, code examples, and API references", |
| 137 | + "", |
| 138 | + "## Optional", |
| 139 | + "", |
| 140 | + "- [SeaTable Website](https://seatable.com): Product website with features, pricing, and use cases", |
| 141 | + "- [REST API Reference](https://api.seatable.com): Interactive REST API documentation with all endpoints", |
| 142 | + "- [Admin Manual](https://admin.seatable.com): Self-hosting installation, configuration, and administration", |
| 143 | + "- [Community Forum](https://forum.seatable.com): Community support, discussions, and feature requests", |
| 144 | + ] |
| 145 | + return "\n".join(lines) + "\n" |
| 146 | + |
| 147 | + |
| 148 | +# --------------------------------------------------------------------------- |
| 149 | +# llms-full.txt (complete content) |
| 150 | +# --------------------------------------------------------------------------- |
| 151 | +def generate_llms_full_txt(config, nav_pages): |
| 152 | + site_name = config.get("site_name", "SeaTable Developer Manual") |
| 153 | + site_desc = config.get("site_description", "").strip() |
| 154 | + |
| 155 | + lines = [ |
| 156 | + f"# {site_name}", |
| 157 | + "", |
| 158 | + f"{site_desc}", |
| 159 | + "", |
| 160 | + f"Base URL: {BASE_URL}", |
| 161 | + "", |
| 162 | + ] |
| 163 | + |
| 164 | + current_section = None |
| 165 | + for section, title, md_path in nav_pages: |
| 166 | + if os.path.basename(md_path) in SKIP_FILES: |
| 167 | + continue |
| 168 | + |
| 169 | + content = read_md_file(md_path) |
| 170 | + if content is None: |
| 171 | + continue |
| 172 | + |
| 173 | + content = clean_for_llm(content) |
| 174 | + |
| 175 | + # Section header |
| 176 | + if section and section != current_section: |
| 177 | + lines += [f"## {section}", ""] |
| 178 | + current_section = section |
| 179 | + |
| 180 | + # Page content |
| 181 | + url = md_path_to_url(md_path) |
| 182 | + lines.append(f"Source: {url}") |
| 183 | + lines.append("") |
| 184 | + lines.append(content) |
| 185 | + lines += ["", "---", ""] |
| 186 | + |
| 187 | + return "\n".join(lines) + "\n" |
| 188 | + |
| 189 | + |
| 190 | +# --------------------------------------------------------------------------- |
| 191 | +# main |
| 192 | +# --------------------------------------------------------------------------- |
| 193 | +def main(): |
| 194 | + config = load_mkdocs_config() |
| 195 | + nav = config.get("nav", []) |
| 196 | + nav_pages = extract_nav_pages(nav) |
| 197 | + |
| 198 | + llms_txt = generate_llms_txt(config, nav_pages) |
| 199 | + llms_txt_path = os.path.join(DOCS_DIR, "llms.txt") |
| 200 | + with open(llms_txt_path, "w") as f: |
| 201 | + f.write(llms_txt) |
| 202 | + print(f"llms.txt — {len(llms_txt):,} bytes, {len(nav_pages)} pages") |
| 203 | + |
| 204 | + llms_full = generate_llms_full_txt(config, nav_pages) |
| 205 | + llms_full_path = os.path.join(DOCS_DIR, "llms-full.txt") |
| 206 | + with open(llms_full_path, "w") as f: |
| 207 | + f.write(llms_full) |
| 208 | + print(f"llms-full.txt — {len(llms_full):,} bytes") |
| 209 | + |
| 210 | + |
| 211 | +if __name__ == "__main__": |
| 212 | + main() |
0 commit comments