#!/usr/bin/env python3 import os import re from collections import defaultdict def extract_title(content): lines = content.split('\n') for line in lines: if line.startswith('# '): title = line[2:].strip() # Remove " | Nixtamal" if present if ' | Nixtamal' in title: title = title.replace(' | Nixtamal', '') return title return 'Untitled' def count_words(content): # Exclude the footer starting from "Site made with Nix" footer_start = content.find('Site made with Nix') if footer_start != -1: content = content[:footer_start] words = re.findall(r'\w+', content) return len(words) def infer_content_type(path): relative_path = path.replace('docs/archive/organized/', '') parts = relative_path.split('/') if len(parts) >= 2: section = parts[0] if section == 'home': return 'Home Page' elif section == 'install': return 'Installation Guide' elif section == 'cookbook': if len(parts) == 2: return 'Cookbook Index' else: return 'Cookbook Recipe' elif section == 'manpage': return 'Manpage' elif section == 'changelog': return 'Changelog' elif section == 'community': return 'Community' elif section == 'faqs': return 'FAQs' elif section == 'funding': return 'Funding' elif section == 'roadmap': return 'Roadmap' elif section == 'real-world-showcase': return 'Real-world Showcase' return 'Other' def main(): base_path = 'docs/archive/organized' # Find all md files md_files = [] for root, dirs, files in os.walk(base_path): for file in files: if file.endswith('.md'): md_files.append(os.path.join(root, file)) pages = [] total_words = 0 for md_file in md_files: try: with open(md_file, 'r', encoding='utf-8') as f: content = f.read() title = extract_title(content) word_count = count_words(content) content_type = infer_content_type(md_file) pages.append({ 'title': title, 'word_count': word_count, 'type': content_type, 'path': md_file.replace('docs/archive/organized/', '') }) total_words += word_count except Exception as e: print(f"Error reading {md_file}: {e}") # Assets assets_path = os.path.join(base_path, 'assets') assets = [] total_asset_size = 0 if os.path.exists(assets_path): for root, dirs, files in os.walk(assets_path): for file in files: path = os.path.join(root, file) try: size = os.path.getsize(path) ext = file.split('.')[-1].lower() if '.' in file else 'unknown' assets.append({ 'path': path.replace('docs/archive/organized/', ''), 'type': ext, 'size': size }) total_asset_size += size except Exception as e: print(f"Error getting info for {path}: {e}") # Write inventory.md with open('docs/inventory.md', 'w', encoding='utf-8') as f: f.write('# Content Inventory\n\n') f.write('## Pages\n\n') f.write('| Title | Word Count | Type | Path |\n') f.write('|-------|------------|------|------|\n') for page in sorted(pages, key=lambda x: x['path']): f.write(f"| {page['title']} | {page['word_count']} | {page['type']} | {page['path']} |\n") f.write('\n## Assets\n\n') f.write('| Path | Type | Size (bytes) |\n') f.write('|------|------|--------------|\n') for asset in sorted(assets, key=lambda x: x['path']): f.write(f"| {asset['path']} | {asset['type']} | {asset['size']} |\n") f.write('\n## Summary Statistics\n\n') f.write(f'- Total Pages: {len(pages)}\n') f.write(f'- Total Words: {total_words}\n') f.write(f'- Total Assets: {len(assets)}\n') f.write(f'- Total Asset Size: {total_asset_size} bytes\n') # Migration mapping with open('docs/migration-mapping.md', 'w', encoding='utf-8') as f: f.write('# Migration Mapping\n\n') f.write('This document outlines how the archived Nixtamal website content will be mapped to the new single-page website structure.\n\n') f.write('## Section Mappings\n\n') f.write('Each old page is mapped to a section in the new single-page layout.\n\n') f.write('| Old Page | New Section | Priority |\n') f.write('|----------|-------------|----------|\n') section_mappings = { 'Home Page': ('Introduction/Hero', 'must-have'), 'Installation Guide': ('Installation', 'must-have'), 'Cookbook Index': ('Cookbook', 'should-have'), 'Cookbook Recipe': ('Cookbook (subsection)', 'should-have'), 'Manpage': ('Documentation/Manual', 'must-have'), 'Changelog': ('Changelog', 'nice-to-have'), 'Community': ('Community', 'should-have'), 'FAQs': ('FAQs', 'should-have'), 'Funding': ('Funding/Support', 'nice-to-have'), 'Roadmap': ('Roadmap', 'nice-to-have'), 'Real-world Showcase': ('Showcase', 'nice-to-have'), 'Other': ('Miscellaneous', 'nice-to-have') } for page in sorted(pages, key=lambda x: x['path']): section, priority = section_mappings.get(page['type'], ('Other', 'nice-to-have')) f.write(f"| {page['title']} | {section} | {priority} |\n") f.write('\n## Content Grouping Suggestions\n\n') f.write('- **Introduction/Hero**: Combine home page content with key features and showcase.\n') f.write('- **Installation**: Direct installation guide.\n') f.write('- **Cookbook**: Group all cookbook recipes under expandable sections or tabs.\n') f.write('- **Documentation/Manual**: Include manpages with proper formatting.\n') f.write('- **Community**: Community links and information.\n') f.write('- **FAQs**: Frequently asked questions.\n') f.write('- **Changelog, Roadmap, Funding, Showcase**: Place in footer or separate sections with navigation.\n') f.write('\n## Priority Rankings\n\n') f.write('- **Must-have**: Introduction/Hero, Installation, Documentation/Manual\n') f.write('- **Should-have**: Cookbook, Community, FAQs\n') f.write('- **Nice-to-have**: Changelog, Roadmap, Funding, Real-world Showcase\n') f.write('\n## Recommendations for New Website Structure\n\n') f.write('The new single-page website should have a sticky navigation header with sections: Home, Install, Cookbook, Docs, Community.\n') f.write('Use smooth scrolling or anchors for navigation within the page.\n') f.write('For cookbook, use accordion or tabbed interface for recipes to keep it organized.\n') f.write('Assets like CSS and logo should be integrated into the single-page design.\n') if __name__ == '__main__': main()