1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
|
#!/usr/bin/env python3
import json
import os
import shutil
def main():
# Load structure.json
with open('docs/archive/structure.json', 'r') as f:
data = json.load(f)
# Create organized directory
organized_dir = 'docs/archive/organized'
os.makedirs(organized_dir, exist_ok=True)
# Copy assets
assets_src = 'docs/archive/assets'
assets_dst = os.path.join(organized_dir, 'assets')
if os.path.exists(assets_src):
shutil.copytree(assets_src, assets_dst, dirs_exist_ok=True)
pages = []
for url, page_data in data.items():
# Skip anchors and non-site URLs
if '#' in url or not url.startswith('https://nixtamal.toast.al'):
continue
# Extract path
if url == 'https://nixtamal.toast.al' or url == 'https://nixtamal.toast.al/':
path = 'home'
else:
path = url[len('https://nixtamal.toast.al/'):].rstrip('/')
if not path:
path = 'home'
# Create directory structure
full_path = os.path.join(organized_dir, path)
os.makedirs(full_path, exist_ok=True)
# Get title and text
title = page_data['title'].strip()
text = page_data['text']
# Clean text: remove excessive whitespace
lines = text.split('\n')
cleaned_lines = []
for line in lines:
stripped = line.strip()
if stripped:
cleaned_lines.append(stripped)
text = '\n\n'.join(cleaned_lines)
# Filename
if '/' in path:
filename = path.split('/')[-1] + '.md'
else:
filename = path + '.md'
# Write markdown file
md_path = os.path.join(full_path, filename)
with open(md_path, 'w', encoding='utf-8') as f:
f.write(f'# {title}\n\n{text}\n')
# Collect for index
pages.append((path, title, md_path.replace(organized_dir + '/', '')))
# Create index.md
index_path = os.path.join(organized_dir, 'index.md')
with open(index_path, 'w', encoding='utf-8') as f:
f.write('# Nixtamal Documentation Archive\n\n')
f.write('This is an organized archive of the Nixtamal documentation.\n\n')
f.write('## Contents\n\n')
# Group by top-level section
sections = {}
for path, title, rel_path in pages:
top = path.split('/')[0] if '/' in path else path
if top not in sections:
sections[top] = []
sections[top].append((path, title, rel_path))
for top in sorted(sections.keys()):
f.write(f'### {top.capitalize()}\n\n')
for path, title, rel_path in sorted(sections[top]):
f.write(f'- [{title}]({rel_path})\n')
f.write('\n')
if __name__ == '__main__':
main()
|