#!/usr/bin/env python3

import os
import re
from collections import defaultdict

def extract_title(content):
    lines = content.split('\n')
    for line in lines:
        if line.startswith('# '):
            title = line[2:].strip()
            # Remove " | Nixtamal" if present
            if ' | Nixtamal' in title:
                title = title.replace(' | Nixtamal', '')
            return title
    return 'Untitled'

def count_words(content):
    # Exclude the footer starting from "Site made with Nix"
    footer_start = content.find('Site made with Nix')
    if footer_start != -1:
        content = content[:footer_start]
    words = re.findall(r'\w+', content)
    return len(words)

def infer_content_type(path):
    relative_path = path.replace('docs/archive/organized/', '')
    parts = relative_path.split('/')
    if len(parts) >= 2:
        section = parts[0]
        if section == 'home':
            return 'Home Page'
        elif section == 'install':
            return 'Installation Guide'
        elif section == 'cookbook':
            if len(parts) == 2:
                return 'Cookbook Index'
            else:
                return 'Cookbook Recipe'
        elif section == 'manpage':
            return 'Manpage'
        elif section == 'changelog':
            return 'Changelog'
        elif section == 'community':
            return 'Community'
        elif section == 'faqs':
            return 'FAQs'
        elif section == 'funding':
            return 'Funding'
        elif section == 'roadmap':
            return 'Roadmap'
        elif section == 'real-world-showcase':
            return 'Real-world Showcase'
    return 'Other'

def main():
    base_path = 'docs/archive/organized'
    
    # Find all md files
    md_files = []
    for root, dirs, files in os.walk(base_path):
        for file in files:
            if file.endswith('.md'):
                md_files.append(os.path.join(root, file))
    
    pages = []
    total_words = 0
    
    for md_file in md_files:
        try:
            with open(md_file, 'r', encoding='utf-8') as f:
                content = f.read()
            title = extract_title(content)
            word_count = count_words(content)
            content_type = infer_content_type(md_file)
            pages.append({
                'title': title,
                'word_count': word_count,
                'type': content_type,
                'path': md_file.replace('docs/archive/organized/', '')
            })
            total_words += word_count
        except Exception as e:
            print(f"Error reading {md_file}: {e}")
    
    # Assets
    assets_path = os.path.join(base_path, 'assets')
    assets = []
    total_asset_size = 0
    if os.path.exists(assets_path):
        for root, dirs, files in os.walk(assets_path):
            for file in files:
                path = os.path.join(root, file)
                try:
                    size = os.path.getsize(path)
                    ext = file.split('.')[-1].lower() if '.' in file else 'unknown'
                    assets.append({
                        'path': path.replace('docs/archive/organized/', ''),
                        'type': ext,
                        'size': size
                    })
                    total_asset_size += size
                except Exception as e:
                    print(f"Error getting info for {path}: {e}")
    
    # Write inventory.md
    with open('docs/inventory.md', 'w', encoding='utf-8') as f:
        f.write('# Content Inventory\n\n')
        
        f.write('## Pages\n\n')
        f.write('| Title | Word Count | Type | Path |\n')
        f.write('|-------|------------|------|------|\n')
        for page in sorted(pages, key=lambda x: x['path']):
            f.write(f"| {page['title']} | {page['word_count']} | {page['type']} | {page['path']} |\n")
        
        f.write('\n## Assets\n\n')
        f.write('| Path | Type | Size (bytes) |\n')
        f.write('|------|------|--------------|\n')
        for asset in sorted(assets, key=lambda x: x['path']):
            f.write(f"| {asset['path']} | {asset['type']} | {asset['size']} |\n")
        
        f.write('\n## Summary Statistics\n\n')
        f.write(f'- Total Pages: {len(pages)}\n')
        f.write(f'- Total Words: {total_words}\n')
        f.write(f'- Total Assets: {len(assets)}\n')
        f.write(f'- Total Asset Size: {total_asset_size} bytes\n')
    
    # Migration mapping
    with open('docs/migration-mapping.md', 'w', encoding='utf-8') as f:
        f.write('# Migration Mapping\n\n')
        f.write('This document outlines how the archived Nixtamal website content will be mapped to the new single-page website structure.\n\n')
        
        f.write('## Section Mappings\n\n')
        f.write('Each old page is mapped to a section in the new single-page layout.\n\n')
        f.write('| Old Page | New Section | Priority |\n')
        f.write('|----------|-------------|----------|\n')
        
        section_mappings = {
            'Home Page': ('Introduction/Hero', 'must-have'),
            'Installation Guide': ('Installation', 'must-have'),
            'Cookbook Index': ('Cookbook', 'should-have'),
            'Cookbook Recipe': ('Cookbook (subsection)', 'should-have'),
            'Manpage': ('Documentation/Manual', 'must-have'),
            'Changelog': ('Changelog', 'nice-to-have'),
            'Community': ('Community', 'should-have'),
            'FAQs': ('FAQs', 'should-have'),
            'Funding': ('Funding/Support', 'nice-to-have'),
            'Roadmap': ('Roadmap', 'nice-to-have'),
            'Real-world Showcase': ('Showcase', 'nice-to-have'),
            'Other': ('Miscellaneous', 'nice-to-have')
        }
        
        for page in sorted(pages, key=lambda x: x['path']):
            section, priority = section_mappings.get(page['type'], ('Other', 'nice-to-have'))
            f.write(f"| {page['title']} | {section} | {priority} |\n")
        
        f.write('\n## Content Grouping Suggestions\n\n')
        f.write('- **Introduction/Hero**: Combine home page content with key features and showcase.\n')
        f.write('- **Installation**: Direct installation guide.\n')
        f.write('- **Cookbook**: Group all cookbook recipes under expandable sections or tabs.\n')
        f.write('- **Documentation/Manual**: Include manpages with proper formatting.\n')
        f.write('- **Community**: Community links and information.\n')
        f.write('- **FAQs**: Frequently asked questions.\n')
        f.write('- **Changelog, Roadmap, Funding, Showcase**: Place in footer or separate sections with navigation.\n')
        
        f.write('\n## Priority Rankings\n\n')
        f.write('- **Must-have**: Introduction/Hero, Installation, Documentation/Manual\n')
        f.write('- **Should-have**: Cookbook, Community, FAQs\n')
        f.write('- **Nice-to-have**: Changelog, Roadmap, Funding, Real-world Showcase\n')
        
        f.write('\n## Recommendations for New Website Structure\n\n')
        f.write('The new single-page website should have a sticky navigation header with sections: Home, Install, Cookbook, Docs, Community.\n')
        f.write('Use smooth scrolling or anchors for navigation within the page.\n')
        f.write('For cookbook, use accordion or tabbed interface for recipes to keep it organized.\n')
        f.write('Assets like CSS and logo should be integrated into the single-page design.\n')

if __name__ == '__main__':
    main()