summaryrefslogtreecommitdiff
path: root/scripts/create_inventory.py
blob: 781b30eb8138f74b0a250b15a3eacf6067a72e42 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
#!/usr/bin/env python3

import os
import re
from collections import defaultdict

def extract_title(content):
    lines = content.split('\n')
    for line in lines:
        if line.startswith('# '):
            title = line[2:].strip()
            # Remove " | Nixtamal" if present
            if ' | Nixtamal' in title:
                title = title.replace(' | Nixtamal', '')
            return title
    return 'Untitled'

def count_words(content):
    # Exclude the footer starting from "Site made with Nix"
    footer_start = content.find('Site made with Nix')
    if footer_start != -1:
        content = content[:footer_start]
    words = re.findall(r'\w+', content)
    return len(words)

def infer_content_type(path):
    relative_path = path.replace('docs/archive/organized/', '')
    parts = relative_path.split('/')
    if len(parts) >= 2:
        section = parts[0]
        if section == 'home':
            return 'Home Page'
        elif section == 'install':
            return 'Installation Guide'
        elif section == 'cookbook':
            if len(parts) == 2:
                return 'Cookbook Index'
            else:
                return 'Cookbook Recipe'
        elif section == 'manpage':
            return 'Manpage'
        elif section == 'changelog':
            return 'Changelog'
        elif section == 'community':
            return 'Community'
        elif section == 'faqs':
            return 'FAQs'
        elif section == 'funding':
            return 'Funding'
        elif section == 'roadmap':
            return 'Roadmap'
        elif section == 'real-world-showcase':
            return 'Real-world Showcase'
    return 'Other'

def main():
    base_path = 'docs/archive/organized'
    
    # Find all md files
    md_files = []
    for root, dirs, files in os.walk(base_path):
        for file in files:
            if file.endswith('.md'):
                md_files.append(os.path.join(root, file))
    
    pages = []
    total_words = 0
    
    for md_file in md_files:
        try:
            with open(md_file, 'r', encoding='utf-8') as f:
                content = f.read()
            title = extract_title(content)
            word_count = count_words(content)
            content_type = infer_content_type(md_file)
            pages.append({
                'title': title,
                'word_count': word_count,
                'type': content_type,
                'path': md_file.replace('docs/archive/organized/', '')
            })
            total_words += word_count
        except Exception as e:
            print(f"Error reading {md_file}: {e}")
    
    # Assets
    assets_path = os.path.join(base_path, 'assets')
    assets = []
    total_asset_size = 0
    if os.path.exists(assets_path):
        for root, dirs, files in os.walk(assets_path):
            for file in files:
                path = os.path.join(root, file)
                try:
                    size = os.path.getsize(path)
                    ext = file.split('.')[-1].lower() if '.' in file else 'unknown'
                    assets.append({
                        'path': path.replace('docs/archive/organized/', ''),
                        'type': ext,
                        'size': size
                    })
                    total_asset_size += size
                except Exception as e:
                    print(f"Error getting info for {path}: {e}")
    
    # Write inventory.md
    with open('docs/inventory.md', 'w', encoding='utf-8') as f:
        f.write('# Content Inventory\n\n')
        
        f.write('## Pages\n\n')
        f.write('| Title | Word Count | Type | Path |\n')
        f.write('|-------|------------|------|------|\n')
        for page in sorted(pages, key=lambda x: x['path']):
            f.write(f"| {page['title']} | {page['word_count']} | {page['type']} | {page['path']} |\n")
        
        f.write('\n## Assets\n\n')
        f.write('| Path | Type | Size (bytes) |\n')
        f.write('|------|------|--------------|\n')
        for asset in sorted(assets, key=lambda x: x['path']):
            f.write(f"| {asset['path']} | {asset['type']} | {asset['size']} |\n")
        
        f.write('\n## Summary Statistics\n\n')
        f.write(f'- Total Pages: {len(pages)}\n')
        f.write(f'- Total Words: {total_words}\n')
        f.write(f'- Total Assets: {len(assets)}\n')
        f.write(f'- Total Asset Size: {total_asset_size} bytes\n')
    
    # Migration mapping
    with open('docs/migration-mapping.md', 'w', encoding='utf-8') as f:
        f.write('# Migration Mapping\n\n')
        f.write('This document outlines how the archived Nixtamal website content will be mapped to the new single-page website structure.\n\n')
        
        f.write('## Section Mappings\n\n')
        f.write('Each old page is mapped to a section in the new single-page layout.\n\n')
        f.write('| Old Page | New Section | Priority |\n')
        f.write('|----------|-------------|----------|\n')
        
        section_mappings = {
            'Home Page': ('Introduction/Hero', 'must-have'),
            'Installation Guide': ('Installation', 'must-have'),
            'Cookbook Index': ('Cookbook', 'should-have'),
            'Cookbook Recipe': ('Cookbook (subsection)', 'should-have'),
            'Manpage': ('Documentation/Manual', 'must-have'),
            'Changelog': ('Changelog', 'nice-to-have'),
            'Community': ('Community', 'should-have'),
            'FAQs': ('FAQs', 'should-have'),
            'Funding': ('Funding/Support', 'nice-to-have'),
            'Roadmap': ('Roadmap', 'nice-to-have'),
            'Real-world Showcase': ('Showcase', 'nice-to-have'),
            'Other': ('Miscellaneous', 'nice-to-have')
        }
        
        for page in sorted(pages, key=lambda x: x['path']):
            section, priority = section_mappings.get(page['type'], ('Other', 'nice-to-have'))
            f.write(f"| {page['title']} | {section} | {priority} |\n")
        
        f.write('\n## Content Grouping Suggestions\n\n')
        f.write('- **Introduction/Hero**: Combine home page content with key features and showcase.\n')
        f.write('- **Installation**: Direct installation guide.\n')
        f.write('- **Cookbook**: Group all cookbook recipes under expandable sections or tabs.\n')
        f.write('- **Documentation/Manual**: Include manpages with proper formatting.\n')
        f.write('- **Community**: Community links and information.\n')
        f.write('- **FAQs**: Frequently asked questions.\n')
        f.write('- **Changelog, Roadmap, Funding, Showcase**: Place in footer or separate sections with navigation.\n')
        
        f.write('\n## Priority Rankings\n\n')
        f.write('- **Must-have**: Introduction/Hero, Installation, Documentation/Manual\n')
        f.write('- **Should-have**: Cookbook, Community, FAQs\n')
        f.write('- **Nice-to-have**: Changelog, Roadmap, Funding, Real-world Showcase\n')
        
        f.write('\n## Recommendations for New Website Structure\n\n')
        f.write('The new single-page website should have a sticky navigation header with sections: Home, Install, Cookbook, Docs, Community.\n')
        f.write('Use smooth scrolling or anchors for navigation within the page.\n')
        f.write('For cookbook, use accordion or tabbed interface for recipes to keep it organized.\n')
        f.write('Assets like CSS and logo should be integrated into the single-page design.\n')

if __name__ == '__main__':
    main()