summaryrefslogtreecommitdiff
path: root/scripts/organize_archive.py
diff options
context:
space:
mode:
authorJohn Bargman2026-04-15 08:23:09 +0000
committerJohn Bargman2026-04-15 08:23:09 +0000
commitdb6b79edbfca3ab7049af2492acd567b099559f5 (patch)
treef54df4a8af70b057032e5af882bd6d1e6be87bf2 /scripts/organize_archive.py
parent4f877207787edd592687f338772d95c9ec2c7038 (diff)
downloadnixtaml-website-main.tar
nixtaml-website-main.tar.gz
nixtaml-website-main.tar.bz2
nixtaml-website-main.tar.lz
nixtaml-website-main.tar.xz
nixtaml-website-main.tar.zst
nixtaml-website-main.zip
agentic ai; is so; fucking cool; omgmain
Diffstat (limited to 'scripts/organize_archive.py')
-rw-r--r--scripts/organize_archive.py90
1 files changed, 90 insertions, 0 deletions
diff --git a/scripts/organize_archive.py b/scripts/organize_archive.py
new file mode 100644
index 0000000..c7dfe48
--- /dev/null
+++ b/scripts/organize_archive.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+
+import json
+import os
+import shutil
+
+def main():
+ # Load structure.json
+ with open('docs/archive/structure.json', 'r') as f:
+ data = json.load(f)
+
+ # Create organized directory
+ organized_dir = 'docs/archive/organized'
+ os.makedirs(organized_dir, exist_ok=True)
+
+ # Copy assets
+ assets_src = 'docs/archive/assets'
+ assets_dst = os.path.join(organized_dir, 'assets')
+ if os.path.exists(assets_src):
+ shutil.copytree(assets_src, assets_dst, dirs_exist_ok=True)
+
+ pages = []
+
+ for url, page_data in data.items():
+ # Skip anchors and non-site URLs
+ if '#' in url or not url.startswith('https://nixtamal.toast.al'):
+ continue
+
+ # Extract path
+ if url == 'https://nixtamal.toast.al' or url == 'https://nixtamal.toast.al/':
+ path = 'home'
+ else:
+ path = url[len('https://nixtamal.toast.al/'):].rstrip('/')
+ if not path:
+ path = 'home'
+
+ # Create directory structure
+ full_path = os.path.join(organized_dir, path)
+ os.makedirs(full_path, exist_ok=True)
+
+ # Get title and text
+ title = page_data['title'].strip()
+ text = page_data['text']
+
+ # Clean text: remove excessive whitespace
+ lines = text.split('\n')
+ cleaned_lines = []
+ for line in lines:
+ stripped = line.strip()
+ if stripped:
+ cleaned_lines.append(stripped)
+ text = '\n\n'.join(cleaned_lines)
+
+ # Filename
+ if '/' in path:
+ filename = path.split('/')[-1] + '.md'
+ else:
+ filename = path + '.md'
+
+ # Write markdown file
+ md_path = os.path.join(full_path, filename)
+ with open(md_path, 'w', encoding='utf-8') as f:
+ f.write(f'# {title}\n\n{text}\n')
+
+ # Collect for index
+ pages.append((path, title, md_path.replace(organized_dir + '/', '')))
+
+ # Create index.md
+ index_path = os.path.join(organized_dir, 'index.md')
+ with open(index_path, 'w', encoding='utf-8') as f:
+ f.write('# Nixtamal Documentation Archive\n\n')
+ f.write('This is an organized archive of the Nixtamal documentation.\n\n')
+ f.write('## Contents\n\n')
+
+ # Group by top-level section
+ sections = {}
+ for path, title, rel_path in pages:
+ top = path.split('/')[0] if '/' in path else path
+ if top not in sections:
+ sections[top] = []
+ sections[top].append((path, title, rel_path))
+
+ for top in sorted(sections.keys()):
+ f.write(f'### {top.capitalize()}\n\n')
+ for path, title, rel_path in sorted(sections[top]):
+ f.write(f'- [{title}]({rel_path})\n')
+ f.write('\n')
+
+if __name__ == '__main__':
+ main() \ No newline at end of file