4 files changed, 571 insertions, 0 deletions
diff --git a/scripts/create_inventory.py b/scripts/create_inventory.py
new file mode 100644
index 0000000..781b30e
--- /dev/null
+++ b/scripts/create_inventory.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+
+import os
+import re
+from collections import defaultdict
+
+def extract_title(content):
+    lines = content.split('\n')
+    for line in lines:
+        if line.startswith('# '):
+            title = line[2:].strip()
+            # Remove " | Nixtamal" if present
+            if ' | Nixtamal' in title:
+                title = title.replace(' | Nixtamal', '')
+            return title
+    return 'Untitled'
+
+def count_words(content):
+    # Exclude the footer starting from "Site made with Nix"
+    footer_start = content.find('Site made with Nix')
+    if footer_start != -1:
+        content = content[:footer_start]
+    words = re.findall(r'\w+', content)
+    return len(words)
+
+def infer_content_type(path):
+    relative_path = path.replace('docs/archive/organized/', '')
+    parts = relative_path.split('/')
+    if len(parts) >= 2:
+        section = parts[0]
+        if section == 'home':
+            return 'Home Page'
+        elif section == 'install':
+            return 'Installation Guide'
+        elif section == 'cookbook':
+            if len(parts) == 2:
+                return 'Cookbook Index'
+            else:
+                return 'Cookbook Recipe'
+        elif section == 'manpage':
+            return 'Manpage'
+        elif section == 'changelog':
+            return 'Changelog'
+        elif section == 'community':
+            return 'Community'
+        elif section == 'faqs':
+            return 'FAQs'
+        elif section == 'funding':
+            return 'Funding'
+        elif section == 'roadmap':
+            return 'Roadmap'
+        elif section == 'real-world-showcase':
+            return 'Real-world Showcase'
+    return 'Other'
+
+def main():
+    base_path = 'docs/archive/organized'
+    
+    # Find all md files
+    md_files = []
+    for root, dirs, files in os.walk(base_path):
+        for file in files:
+            if file.endswith('.md'):
+                md_files.append(os.path.join(root, file))
+    
+    pages = []
+    total_words = 0
+    
+    for md_file in md_files:
+        try:
+            with open(md_file, 'r', encoding='utf-8') as f:
+                content = f.read()
+            title = extract_title(content)
+            word_count = count_words(content)
+            content_type = infer_content_type(md_file)
+            pages.append({
+                'title': title,
+                'word_count': word_count,
+                'type': content_type,
+                'path': md_file.replace('docs/archive/organized/', '')
+            })
+            total_words += word_count
+        except Exception as e:
+            print(f"Error reading {md_file}: {e}")
+    
+    # Assets
+    assets_path = os.path.join(base_path, 'assets')
+    assets = []
+    total_asset_size = 0
+    if os.path.exists(assets_path):
+        for root, dirs, files in os.walk(assets_path):
+            for file in files:
+                path = os.path.join(root, file)
+                try:
+                    size = os.path.getsize(path)
+                    ext = file.split('.')[-1].lower() if '.' in file else 'unknown'
+                    assets.append({
+                        'path': path.replace('docs/archive/organized/', ''),
+                        'type': ext,
+                        'size': size
+                    })
+                    total_asset_size += size
+                except Exception as e:
+                    print(f"Error getting info for {path}: {e}")
+    
+    # Write inventory.md
+    with open('docs/inventory.md', 'w', encoding='utf-8') as f:
+        f.write('# Content Inventory\n\n')
+        
+        f.write('## Pages\n\n')
+        f.write('| Title | Word Count | Type | Path |\n')
+        f.write('|-------|------------|------|------|\n')
+        for page in sorted(pages, key=lambda x: x['path']):
+            f.write(f"| {page['title']} | {page['word_count']} | {page['type']} | {page['path']} |\n")
+        
+        f.write('\n## Assets\n\n')
+        f.write('| Path | Type | Size (bytes) |\n')
+        f.write('|------|------|--------------|\n')
+        for asset in sorted(assets, key=lambda x: x['path']):
+            f.write(f"| {asset['path']} | {asset['type']} | {asset['size']} |\n")
+        
+        f.write('\n## Summary Statistics\n\n')
+        f.write(f'- Total Pages: {len(pages)}\n')
+        f.write(f'- Total Words: {total_words}\n')
+        f.write(f'- Total Assets: {len(assets)}\n')
+        f.write(f'- Total Asset Size: {total_asset_size} bytes\n')
+    
+    # Migration mapping
+    with open('docs/migration-mapping.md', 'w', encoding='utf-8') as f:
+        f.write('# Migration Mapping\n\n')
+        f.write('This document outlines how the archived Nixtamal website content will be mapped to the new single-page website structure.\n\n')
+        
+        f.write('## Section Mappings\n\n')
+        f.write('Each old page is mapped to a section in the new single-page layout.\n\n')
+        f.write('| Old Page | New Section | Priority |\n')
+        f.write('|----------|-------------|----------|\n')
+        
+        section_mappings = {
+            'Home Page': ('Introduction/Hero', 'must-have'),
+            'Installation Guide': ('Installation', 'must-have'),
+            'Cookbook Index': ('Cookbook', 'should-have'),
+            'Cookbook Recipe': ('Cookbook (subsection)', 'should-have'),
+            'Manpage': ('Documentation/Manual', 'must-have'),
+            'Changelog': ('Changelog', 'nice-to-have'),
+            'Community': ('Community', 'should-have'),
+            'FAQs': ('FAQs', 'should-have'),
+            'Funding': ('Funding/Support', 'nice-to-have'),
+            'Roadmap': ('Roadmap', 'nice-to-have'),
+            'Real-world Showcase': ('Showcase', 'nice-to-have'),
+            'Other': ('Miscellaneous', 'nice-to-have')
+        }
+        
+        for page in sorted(pages, key=lambda x: x['path']):
+            section, priority = section_mappings.get(page['type'], ('Other', 'nice-to-have'))
+            f.write(f"| {page['title']} | {section} | {priority} |\n")
+        
+        f.write('\n## Content Grouping Suggestions\n\n')
+        f.write('- **Introduction/Hero**: Combine home page content with key features and showcase.\n')
+        f.write('- **Installation**: Direct installation guide.\n')
+        f.write('- **Cookbook**: Group all cookbook recipes under expandable sections or tabs.\n')
+        f.write('- **Documentation/Manual**: Include manpages with proper formatting.\n')
+        f.write('- **Community**: Community links and information.\n')
+        f.write('- **FAQs**: Frequently asked questions.\n')
+        f.write('- **Changelog, Roadmap, Funding, Showcase**: Place in footer or separate sections with navigation.\n')
+        
+        f.write('\n## Priority Rankings\n\n')
+        f.write('- **Must-have**: Introduction/Hero, Installation, Documentation/Manual\n')
+        f.write('- **Should-have**: Cookbook, Community, FAQs\n')
+        f.write('- **Nice-to-have**: Changelog, Roadmap, Funding, Real-world Showcase\n')
+        
+        f.write('\n## Recommendations for New Website Structure\n\n')
+        f.write('The new single-page website should have a sticky navigation header with sections: Home, Install, Cookbook, Docs, Community.\n')
+        f.write('Use smooth scrolling or anchors for navigation within the page.\n')
+        f.write('For cookbook, use accordion or tabbed interface for recipes to keep it organized.\n')
+        f.write('Assets like CSS and logo should be integrated into the single-page design.\n')
+
+if __name__ == '__main__':
+    main()
+\ No newline at end of file
diff --git a/scripts/organize_archive.py b/scripts/organize_archive.py
new file mode 100644
index 0000000..c7dfe48
--- /dev/null
+++ b/scripts/organize_archive.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+
+import json
+import os
+import shutil
+
+def main():
+    # Load structure.json
+    with open('docs/archive/structure.json', 'r') as f:
+        data = json.load(f)
+
+    # Create organized directory
+    organized_dir = 'docs/archive/organized'
+    os.makedirs(organized_dir, exist_ok=True)
+
+    # Copy assets
+    assets_src = 'docs/archive/assets'
+    assets_dst = os.path.join(organized_dir, 'assets')
+    if os.path.exists(assets_src):
+        shutil.copytree(assets_src, assets_dst, dirs_exist_ok=True)
+
+    pages = []
+
+    for url, page_data in data.items():
+        # Skip anchors and non-site URLs
+        if '#' in url or not url.startswith('https://nixtamal.toast.al'):
+            continue
+
+        # Extract path
+        if url == 'https://nixtamal.toast.al' or url == 'https://nixtamal.toast.al/':
+            path = 'home'
+        else:
+            path = url[len('https://nixtamal.toast.al/'):].rstrip('/')
+            if not path:
+                path = 'home'
+
+        # Create directory structure
+        full_path = os.path.join(organized_dir, path)
+        os.makedirs(full_path, exist_ok=True)
+
+        # Get title and text
+        title = page_data['title'].strip()
+        text = page_data['text']
+
+        # Clean text: remove excessive whitespace
+        lines = text.split('\n')
+        cleaned_lines = []
+        for line in lines:
+            stripped = line.strip()
+            if stripped:
+                cleaned_lines.append(stripped)
+        text = '\n\n'.join(cleaned_lines)
+
+        # Filename
+        if '/' in path:
+            filename = path.split('/')[-1] + '.md'
+        else:
+            filename = path + '.md'
+
+        # Write markdown file
+        md_path = os.path.join(full_path, filename)
+        with open(md_path, 'w', encoding='utf-8') as f:
+            f.write(f'# {title}\n\n{text}\n')
+
+        # Collect for index
+        pages.append((path, title, md_path.replace(organized_dir + '/', '')))
+
+    # Create index.md
+    index_path = os.path.join(organized_dir, 'index.md')
+    with open(index_path, 'w', encoding='utf-8') as f:
+        f.write('# Nixtamal Documentation Archive\n\n')
+        f.write('This is an organized archive of the Nixtamal documentation.\n\n')
+        f.write('## Contents\n\n')
+
+        # Group by top-level section
+        sections = {}
+        for path, title, rel_path in pages:
+            top = path.split('/')[0] if '/' in path else path
+            if top not in sections:
+                sections[top] = []
+            sections[top].append((path, title, rel_path))
+
+        for top in sorted(sections.keys()):
+            f.write(f'### {top.capitalize()}\n\n')
+            for path, title, rel_path in sorted(sections[top]):
+                f.write(f'- [{title}]({rel_path})\n')
+            f.write('\n')
+
+if __name__ == '__main__':
+    main()
+\ No newline at end of file
diff --git a/scripts/spider.py b/scripts/spider.py
new file mode 100644
index 0000000..c73a1ff
--- /dev/null
+++ b/scripts/spider.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+import urllib.parse
+import urllib.robotparser
+import html.parser
+import json
+import requests
+import mimetypes
+from collections import deque
+
+class LinkExtractor(html.parser.HTMLParser):
+    def __init__(self):
+        super().__init__()
+        self.links = []
+        self.images = []
+        self.scripts = []
+        self.styles = []
+        self.text_parts = []
+        self.title = ''
+        self.meta = {}
+        self.in_title = False
+
+    def handle_starttag(self, tag, attrs):
+        if tag == 'a':
+            for attr, value in attrs:
+                if attr == 'href':
+                    self.links.append(value)
+        elif tag == 'img':
+            for attr, value in attrs:
+                if attr == 'src':
+                    self.images.append(value)
+        elif tag == 'script':
+            for attr, value in attrs:
+                if attr == 'src':
+                    self.scripts.append(value)
+        elif tag == 'link':
+            rel = None
+            href = None
+            for attr, value in attrs:
+                if attr == 'rel':
+                    rel = value
+                elif attr == 'href':
+                    href = value
+            if rel in ['stylesheet', 'icon'] and href:
+                self.styles.append(href)
+        elif tag == 'title':
+            self.in_title = True
+        elif tag == 'meta':
+            name = None
+            content = None
+            for attr, value in attrs:
+                if attr == 'name' or attr == 'property':
+                    name = value
+                elif attr == 'content':
+                    content = value
+            if name and content:
+                self.meta[name] = content
+
+    def handle_endtag(self, tag):
+        if tag == 'title':
+            self.in_title = False
+
+    def handle_data(self, data):
+        if self.in_title:
+            self.title += data
+        else:
+            self.text_parts.append(data.strip())
+
+def download_asset(url, base_path, timeout=10):
+    try:
+        resp = requests.get(url, timeout=timeout)
+        if resp.status_code == 200:
+            content_type = resp.headers.get('content-type', '')
+            ext = mimetypes.guess_extension(content_type) or '.bin'
+            filename = os.path.basename(urllib.parse.urlparse(url).path)
+            if not filename:
+                filename = 'asset' + ext
+            elif not os.path.splitext(filename)[1]:
+                filename += ext
+            filepath = os.path.join(base_path, filename)
+            os.makedirs(os.path.dirname(filepath), exist_ok=True)
+            with open(filepath, 'wb') as f:
+                f.write(resp.content)
+            return filepath
+    except Exception as e:
+        print(f"Error downloading {url}: {e}")
+    return None
+
+def main():
+    base_url = 'https://nixtamal.toast.al'
+    
+    # Check robots.txt
+    rp = urllib.robotparser.RobotFileParser()
+    rp.set_url(base_url + '/robots.txt')
+    try:
+        rp.read()
+        if not rp.can_fetch('*', base_url + '/'):
+            print("Crawling not allowed by robots.txt")
+            sys.exit(1)
+    except:
+        print("Could not read robots.txt, proceeding assuming allowed")
+    
+    # Create directories
+    os.makedirs('docs/archive', exist_ok=True)
+    os.makedirs('docs/archive/assets', exist_ok=True)
+    
+    visited = set()
+    queue = deque([base_url])
+    pages_data = {}
+    
+    while queue:
+        url = queue.popleft()
+        if url in visited:
+            continue
+        visited.add(url)
+        print(f"Crawling: {url}")
+        
+        try:
+            resp = requests.get(url, timeout=10)
+            if resp.status_code != 200:
+                print(f"Skipping {url} with status {resp.status_code}")
+                continue
+            
+            content = resp.text
+            parser = LinkExtractor()
+            parser.feed(content)
+            
+            # Make links absolute
+            abs_links = []
+            for link in parser.links:
+                abs_link = urllib.parse.urljoin(url, link)
+                if abs_link.startswith(base_url):
+                    abs_links.append(abs_link)
+                    if abs_link not in visited and abs_link not in queue:
+                        queue.append(abs_link)
+            
+            # Download assets
+            assets = []
+            for img in parser.images:
+                img_url = urllib.parse.urljoin(url, img)
+                if img_url.startswith(base_url):
+                    path = download_asset(img_url, 'docs/archive/assets')
+                    if path:
+                        assets.append({'type': 'image', 'url': img_url, 'local_path': path})
+            
+            for script in parser.scripts:
+                script_url = urllib.parse.urljoin(url, script)
+                if script_url.startswith(base_url):
+                    path = download_asset(script_url, 'docs/archive/assets')
+                    if path:
+                        assets.append({'type': 'script', 'url': script_url, 'local_path': path})
+            
+            for style in parser.styles:
+                style_url = urllib.parse.urljoin(url, style)
+                if style_url.startswith(base_url):
+                    path = download_asset(style_url, 'docs/archive/assets')
+                    if path:
+                        assets.append({'type': 'style', 'url': style_url, 'local_path': path})
+            
+            # Save page
+            path = urllib.parse.urlparse(url).path
+            if not path or path == '/':
+                filename = 'index.html'
+            else:
+                filename = path.strip('/').replace('/', '_') + '.html'
+            filepath = os.path.join('docs/archive', filename)
+            with open(filepath, 'w', encoding='utf-8') as f:
+                f.write(content)
+            
+            # Collect data
+            pages_data[url] = {
+                'title': parser.title,
+                'meta': parser.meta,
+                'text': ' '.join(parser.text_parts),
+                'links': abs_links,
+                'assets': assets,
+                'local_file': filepath
+            }
+        
+        except Exception as e:
+            print(f"Error crawling {url}: {e}")
+    
+    # Save structure
+    with open('docs/archive/structure.json', 'w', encoding='utf-8') as f:
+        json.dump(pages_data, f, indent=2, ensure_ascii=False)
+    
+    print("Crawling complete. Data saved to docs/archive/")
+
+if __name__ == '__main__':
+    main()
+\ No newline at end of file
diff --git a/scripts/test.sh b/scripts/test.sh
new file mode 100755
index 0000000..a66915e
--- /dev/null
+++ b/scripts/test.sh
@@ -0,0 +1,111 @@
+#!/run/current-system/sw/bin/bash
+
+# Test script for Nixtaml single-page website
+# Checks file references and basic HTML structure
+
+echo "=== Nixtaml Website Integration Test ==="
+
+# Base directory
+BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+INDEX_FILE="$BASE_DIR/index.html"
+
+# Check if index.html exists
+if [ ! -f "$INDEX_FILE" ]; then
+    echo "ERROR: index.html not found at $INDEX_FILE"
+    exit 1
+fi
+
+echo "Checking file references in index.html..."
+
+# Extract src attributes from script tags
+SCRIPT_FILES=$(grep -oP 'src="\K[^"]+' "$INDEX_FILE")
+# Extract href from link tags, excluding anchors
+LINK_FILES=$(grep -oP 'href="\K[^"#][^"]*' "$INDEX_FILE")
+
+# Combine and deduplicate
+ALL_FILES=$(echo -e "$SCRIPT_FILES\n$LINK_FILES" | sort | uniq)
+
+MISSING_FILES=()
+for file in $ALL_FILES; do
+    # Skip external URLs (starting with http)
+    if [[ $file == http* ]]; then
+        continue
+    fi
+
+    # Check if file exists relative to BASE_DIR
+    if [ ! -f "$BASE_DIR/$file" ]; then
+        MISSING_FILES+=("$file")
+    fi
+done
+
+# Report missing files
+if [ ${#MISSING_FILES[@]} -gt 0 ]; then
+    echo "ERROR: Missing referenced files:"
+    for file in "${MISSING_FILES[@]}"; do
+        echo "  - $file"
+    done
+else
+    echo "✓ All referenced files exist"
+fi
+
+# Basic HTML structure validation
+echo "Validating basic HTML structure..."
+
+# Check for required elements
+if ! grep -q "<!DOCTYPE html>" "$INDEX_FILE"; then
+    echo "WARNING: Missing DOCTYPE declaration"
+fi
+
+if ! grep -q "<html" "$INDEX_FILE"; then
+    echo "ERROR: Missing <html> tag"
+fi
+
+if ! grep -q "</html>" "$INDEX_FILE"; then
+    echo "ERROR: Missing </html> closing tag"
+fi
+
+if ! grep -q "<head>" "$INDEX_FILE"; then
+    echo "ERROR: Missing <head> tag"
+fi
+
+if ! grep -q "<body>" "$INDEX_FILE"; then
+    echo "ERROR: Missing <body> tag"
+fi
+
+# Check for balanced tags (basic check)
+OPEN_TAGS=$(grep -o '<[^/][^>]*>' "$INDEX_FILE" | wc -l)
+CLOSE_TAGS=$(grep -o '</[^>]*>' "$INDEX_FILE" | wc -l)
+
+if [ "$OPEN_TAGS" -ne "$CLOSE_TAGS" ]; then
+    echo "WARNING: Potential unbalanced tags (open: $OPEN_TAGS, close: $CLOSE_TAGS)"
+else
+    echo "✓ Basic tag balance looks good"
+fi
+
+# Check for required sections
+REQUIRED_SECTIONS=("home" "install" "cookbook" "docs" "community")
+for section in "${REQUIRED_SECTIONS[@]}"; do
+    if ! grep -q "id=\"$section\"" "$INDEX_FILE"; then
+        echo "ERROR: Missing section with id=\"$section\""
+    fi
+done
+
+echo "✓ Section IDs present"
+
+# Check for WebGL canvas insertion point
+if ! grep -q "webgl-bg.js" "$INDEX_FILE"; then
+    echo "WARNING: webgl-bg.js not referenced - WebGL background may not load"
+fi
+
+if ! grep -q "parallax.js" "$INDEX_FILE"; then
+    echo "WARNING: parallax.js not referenced - Parallax effects may not work"
+fi
+
+echo "=== Test Complete ==="
+
+if [ ${#MISSING_FILES[@]} -gt 0 ]; then
+    echo "FAIL: Issues found"
+    exit 1
+else
+    echo "PASS: All checks passed"
+fi
+\ No newline at end of file