summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/create_inventory.py178
-rw-r--r--scripts/organize_archive.py90
-rw-r--r--scripts/spider.py192
-rwxr-xr-xscripts/test.sh111
4 files changed, 571 insertions, 0 deletions
diff --git a/scripts/create_inventory.py b/scripts/create_inventory.py
new file mode 100644
index 0000000..781b30e
--- /dev/null
+++ b/scripts/create_inventory.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+
+import os
+import re
+from collections import defaultdict
+
+def extract_title(content):
+ lines = content.split('\n')
+ for line in lines:
+ if line.startswith('# '):
+ title = line[2:].strip()
+ # Remove " | Nixtamal" if present
+ if ' | Nixtamal' in title:
+ title = title.replace(' | Nixtamal', '')
+ return title
+ return 'Untitled'
+
+def count_words(content):
+ # Exclude the footer starting from "Site made with Nix"
+ footer_start = content.find('Site made with Nix')
+ if footer_start != -1:
+ content = content[:footer_start]
+ words = re.findall(r'\w+', content)
+ return len(words)
+
+def infer_content_type(path):
+ relative_path = path.replace('docs/archive/organized/', '')
+ parts = relative_path.split('/')
+ if len(parts) >= 2:
+ section = parts[0]
+ if section == 'home':
+ return 'Home Page'
+ elif section == 'install':
+ return 'Installation Guide'
+ elif section == 'cookbook':
+ if len(parts) == 2:
+ return 'Cookbook Index'
+ else:
+ return 'Cookbook Recipe'
+ elif section == 'manpage':
+ return 'Manpage'
+ elif section == 'changelog':
+ return 'Changelog'
+ elif section == 'community':
+ return 'Community'
+ elif section == 'faqs':
+ return 'FAQs'
+ elif section == 'funding':
+ return 'Funding'
+ elif section == 'roadmap':
+ return 'Roadmap'
+ elif section == 'real-world-showcase':
+ return 'Real-world Showcase'
+ return 'Other'
+
+def main():
+ base_path = 'docs/archive/organized'
+
+ # Find all md files
+ md_files = []
+ for root, dirs, files in os.walk(base_path):
+ for file in files:
+ if file.endswith('.md'):
+ md_files.append(os.path.join(root, file))
+
+ pages = []
+ total_words = 0
+
+ for md_file in md_files:
+ try:
+ with open(md_file, 'r', encoding='utf-8') as f:
+ content = f.read()
+ title = extract_title(content)
+ word_count = count_words(content)
+ content_type = infer_content_type(md_file)
+ pages.append({
+ 'title': title,
+ 'word_count': word_count,
+ 'type': content_type,
+ 'path': md_file.replace('docs/archive/organized/', '')
+ })
+ total_words += word_count
+ except Exception as e:
+ print(f"Error reading {md_file}: {e}")
+
+ # Assets
+ assets_path = os.path.join(base_path, 'assets')
+ assets = []
+ total_asset_size = 0
+ if os.path.exists(assets_path):
+ for root, dirs, files in os.walk(assets_path):
+ for file in files:
+ path = os.path.join(root, file)
+ try:
+ size = os.path.getsize(path)
+ ext = file.split('.')[-1].lower() if '.' in file else 'unknown'
+ assets.append({
+ 'path': path.replace('docs/archive/organized/', ''),
+ 'type': ext,
+ 'size': size
+ })
+ total_asset_size += size
+ except Exception as e:
+ print(f"Error getting info for {path}: {e}")
+
+ # Write inventory.md
+ with open('docs/inventory.md', 'w', encoding='utf-8') as f:
+ f.write('# Content Inventory\n\n')
+
+ f.write('## Pages\n\n')
+ f.write('| Title | Word Count | Type | Path |\n')
+ f.write('|-------|------------|------|------|\n')
+ for page in sorted(pages, key=lambda x: x['path']):
+ f.write(f"| {page['title']} | {page['word_count']} | {page['type']} | {page['path']} |\n")
+
+ f.write('\n## Assets\n\n')
+ f.write('| Path | Type | Size (bytes) |\n')
+ f.write('|------|------|--------------|\n')
+ for asset in sorted(assets, key=lambda x: x['path']):
+ f.write(f"| {asset['path']} | {asset['type']} | {asset['size']} |\n")
+
+ f.write('\n## Summary Statistics\n\n')
+ f.write(f'- Total Pages: {len(pages)}\n')
+ f.write(f'- Total Words: {total_words}\n')
+ f.write(f'- Total Assets: {len(assets)}\n')
+ f.write(f'- Total Asset Size: {total_asset_size} bytes\n')
+
+ # Migration mapping
+ with open('docs/migration-mapping.md', 'w', encoding='utf-8') as f:
+ f.write('# Migration Mapping\n\n')
+ f.write('This document outlines how the archived Nixtamal website content will be mapped to the new single-page website structure.\n\n')
+
+ f.write('## Section Mappings\n\n')
+ f.write('Each old page is mapped to a section in the new single-page layout.\n\n')
+ f.write('| Old Page | New Section | Priority |\n')
+ f.write('|----------|-------------|----------|\n')
+
+ section_mappings = {
+ 'Home Page': ('Introduction/Hero', 'must-have'),
+ 'Installation Guide': ('Installation', 'must-have'),
+ 'Cookbook Index': ('Cookbook', 'should-have'),
+ 'Cookbook Recipe': ('Cookbook (subsection)', 'should-have'),
+ 'Manpage': ('Documentation/Manual', 'must-have'),
+ 'Changelog': ('Changelog', 'nice-to-have'),
+ 'Community': ('Community', 'should-have'),
+ 'FAQs': ('FAQs', 'should-have'),
+ 'Funding': ('Funding/Support', 'nice-to-have'),
+ 'Roadmap': ('Roadmap', 'nice-to-have'),
+ 'Real-world Showcase': ('Showcase', 'nice-to-have'),
+ 'Other': ('Miscellaneous', 'nice-to-have')
+ }
+
+ for page in sorted(pages, key=lambda x: x['path']):
+ section, priority = section_mappings.get(page['type'], ('Other', 'nice-to-have'))
+ f.write(f"| {page['title']} | {section} | {priority} |\n")
+
+ f.write('\n## Content Grouping Suggestions\n\n')
+ f.write('- **Introduction/Hero**: Combine home page content with key features and showcase.\n')
+ f.write('- **Installation**: Direct installation guide.\n')
+ f.write('- **Cookbook**: Group all cookbook recipes under expandable sections or tabs.\n')
+ f.write('- **Documentation/Manual**: Include manpages with proper formatting.\n')
+ f.write('- **Community**: Community links and information.\n')
+ f.write('- **FAQs**: Frequently asked questions.\n')
+ f.write('- **Changelog, Roadmap, Funding, Showcase**: Place in footer or separate sections with navigation.\n')
+
+ f.write('\n## Priority Rankings\n\n')
+ f.write('- **Must-have**: Introduction/Hero, Installation, Documentation/Manual\n')
+ f.write('- **Should-have**: Cookbook, Community, FAQs\n')
+ f.write('- **Nice-to-have**: Changelog, Roadmap, Funding, Real-world Showcase\n')
+
+ f.write('\n## Recommendations for New Website Structure\n\n')
+ f.write('The new single-page website should have a sticky navigation header with sections: Home, Install, Cookbook, Docs, Community.\n')
+ f.write('Use smooth scrolling or anchors for navigation within the page.\n')
+ f.write('For cookbook, use accordion or tabbed interface for recipes to keep it organized.\n')
+ f.write('Assets like CSS and logo should be integrated into the single-page design.\n')
+
+if __name__ == '__main__':
+ main() \ No newline at end of file
diff --git a/scripts/organize_archive.py b/scripts/organize_archive.py
new file mode 100644
index 0000000..c7dfe48
--- /dev/null
+++ b/scripts/organize_archive.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+
+import json
+import os
+import shutil
+
+def main():
+ # Load structure.json
+ with open('docs/archive/structure.json', 'r') as f:
+ data = json.load(f)
+
+ # Create organized directory
+ organized_dir = 'docs/archive/organized'
+ os.makedirs(organized_dir, exist_ok=True)
+
+ # Copy assets
+ assets_src = 'docs/archive/assets'
+ assets_dst = os.path.join(organized_dir, 'assets')
+ if os.path.exists(assets_src):
+ shutil.copytree(assets_src, assets_dst, dirs_exist_ok=True)
+
+ pages = []
+
+ for url, page_data in data.items():
+ # Skip anchors and non-site URLs
+ if '#' in url or not url.startswith('https://nixtamal.toast.al'):
+ continue
+
+ # Extract path
+ if url == 'https://nixtamal.toast.al' or url == 'https://nixtamal.toast.al/':
+ path = 'home'
+ else:
+ path = url[len('https://nixtamal.toast.al/'):].rstrip('/')
+ if not path:
+ path = 'home'
+
+ # Create directory structure
+ full_path = os.path.join(organized_dir, path)
+ os.makedirs(full_path, exist_ok=True)
+
+ # Get title and text
+ title = page_data['title'].strip()
+ text = page_data['text']
+
+ # Clean text: remove excessive whitespace
+ lines = text.split('\n')
+ cleaned_lines = []
+ for line in lines:
+ stripped = line.strip()
+ if stripped:
+ cleaned_lines.append(stripped)
+ text = '\n\n'.join(cleaned_lines)
+
+ # Filename
+ if '/' in path:
+ filename = path.split('/')[-1] + '.md'
+ else:
+ filename = path + '.md'
+
+ # Write markdown file
+ md_path = os.path.join(full_path, filename)
+ with open(md_path, 'w', encoding='utf-8') as f:
+ f.write(f'# {title}\n\n{text}\n')
+
+ # Collect for index
+ pages.append((path, title, md_path.replace(organized_dir + '/', '')))
+
+ # Create index.md
+ index_path = os.path.join(organized_dir, 'index.md')
+ with open(index_path, 'w', encoding='utf-8') as f:
+ f.write('# Nixtamal Documentation Archive\n\n')
+ f.write('This is an organized archive of the Nixtamal documentation.\n\n')
+ f.write('## Contents\n\n')
+
+ # Group by top-level section
+ sections = {}
+ for path, title, rel_path in pages:
+ top = path.split('/')[0] if '/' in path else path
+ if top not in sections:
+ sections[top] = []
+ sections[top].append((path, title, rel_path))
+
+ for top in sorted(sections.keys()):
+ f.write(f'### {top.capitalize()}\n\n')
+ for path, title, rel_path in sorted(sections[top]):
+ f.write(f'- [{title}]({rel_path})\n')
+ f.write('\n')
+
+if __name__ == '__main__':
+ main() \ No newline at end of file
diff --git a/scripts/spider.py b/scripts/spider.py
new file mode 100644
index 0000000..c73a1ff
--- /dev/null
+++ b/scripts/spider.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+import urllib.parse
+import urllib.robotparser
+import html.parser
+import json
+import requests
+import mimetypes
+from collections import deque
+
+class LinkExtractor(html.parser.HTMLParser):
+ def __init__(self):
+ super().__init__()
+ self.links = []
+ self.images = []
+ self.scripts = []
+ self.styles = []
+ self.text_parts = []
+ self.title = ''
+ self.meta = {}
+ self.in_title = False
+
+ def handle_starttag(self, tag, attrs):
+ if tag == 'a':
+ for attr, value in attrs:
+ if attr == 'href':
+ self.links.append(value)
+ elif tag == 'img':
+ for attr, value in attrs:
+ if attr == 'src':
+ self.images.append(value)
+ elif tag == 'script':
+ for attr, value in attrs:
+ if attr == 'src':
+ self.scripts.append(value)
+ elif tag == 'link':
+ rel = None
+ href = None
+ for attr, value in attrs:
+ if attr == 'rel':
+ rel = value
+ elif attr == 'href':
+ href = value
+ if rel in ['stylesheet', 'icon'] and href:
+ self.styles.append(href)
+ elif tag == 'title':
+ self.in_title = True
+ elif tag == 'meta':
+ name = None
+ content = None
+ for attr, value in attrs:
+ if attr == 'name' or attr == 'property':
+ name = value
+ elif attr == 'content':
+ content = value
+ if name and content:
+ self.meta[name] = content
+
+ def handle_endtag(self, tag):
+ if tag == 'title':
+ self.in_title = False
+
+ def handle_data(self, data):
+ if self.in_title:
+ self.title += data
+ else:
+ self.text_parts.append(data.strip())
+
+def download_asset(url, base_path, timeout=10):
+ try:
+ resp = requests.get(url, timeout=timeout)
+ if resp.status_code == 200:
+ content_type = resp.headers.get('content-type', '')
+ ext = mimetypes.guess_extension(content_type) or '.bin'
+ filename = os.path.basename(urllib.parse.urlparse(url).path)
+ if not filename:
+ filename = 'asset' + ext
+ elif not os.path.splitext(filename)[1]:
+ filename += ext
+ filepath = os.path.join(base_path, filename)
+ os.makedirs(os.path.dirname(filepath), exist_ok=True)
+ with open(filepath, 'wb') as f:
+ f.write(resp.content)
+ return filepath
+ except Exception as e:
+ print(f"Error downloading {url}: {e}")
+ return None
+
+def main():
+ base_url = 'https://nixtamal.toast.al'
+
+ # Check robots.txt
+ rp = urllib.robotparser.RobotFileParser()
+ rp.set_url(base_url + '/robots.txt')
+ try:
+ rp.read()
+ if not rp.can_fetch('*', base_url + '/'):
+ print("Crawling not allowed by robots.txt")
+ sys.exit(1)
+ except:
+ print("Could not read robots.txt, proceeding assuming allowed")
+
+ # Create directories
+ os.makedirs('docs/archive', exist_ok=True)
+ os.makedirs('docs/archive/assets', exist_ok=True)
+
+ visited = set()
+ queue = deque([base_url])
+ pages_data = {}
+
+ while queue:
+ url = queue.popleft()
+ if url in visited:
+ continue
+ visited.add(url)
+ print(f"Crawling: {url}")
+
+ try:
+ resp = requests.get(url, timeout=10)
+ if resp.status_code != 200:
+ print(f"Skipping {url} with status {resp.status_code}")
+ continue
+
+ content = resp.text
+ parser = LinkExtractor()
+ parser.feed(content)
+
+ # Make links absolute
+ abs_links = []
+ for link in parser.links:
+ abs_link = urllib.parse.urljoin(url, link)
+ if abs_link.startswith(base_url):
+ abs_links.append(abs_link)
+ if abs_link not in visited and abs_link not in queue:
+ queue.append(abs_link)
+
+ # Download assets
+ assets = []
+ for img in parser.images:
+ img_url = urllib.parse.urljoin(url, img)
+ if img_url.startswith(base_url):
+ path = download_asset(img_url, 'docs/archive/assets')
+ if path:
+ assets.append({'type': 'image', 'url': img_url, 'local_path': path})
+
+ for script in parser.scripts:
+ script_url = urllib.parse.urljoin(url, script)
+ if script_url.startswith(base_url):
+ path = download_asset(script_url, 'docs/archive/assets')
+ if path:
+ assets.append({'type': 'script', 'url': script_url, 'local_path': path})
+
+ for style in parser.styles:
+ style_url = urllib.parse.urljoin(url, style)
+ if style_url.startswith(base_url):
+ path = download_asset(style_url, 'docs/archive/assets')
+ if path:
+ assets.append({'type': 'style', 'url': style_url, 'local_path': path})
+
+ # Save page
+ path = urllib.parse.urlparse(url).path
+ if not path or path == '/':
+ filename = 'index.html'
+ else:
+ filename = path.strip('/').replace('/', '_') + '.html'
+ filepath = os.path.join('docs/archive', filename)
+ with open(filepath, 'w', encoding='utf-8') as f:
+ f.write(content)
+
+ # Collect data
+ pages_data[url] = {
+ 'title': parser.title,
+ 'meta': parser.meta,
+ 'text': ' '.join(parser.text_parts),
+ 'links': abs_links,
+ 'assets': assets,
+ 'local_file': filepath
+ }
+
+ except Exception as e:
+ print(f"Error crawling {url}: {e}")
+
+ # Save structure
+ with open('docs/archive/structure.json', 'w', encoding='utf-8') as f:
+ json.dump(pages_data, f, indent=2, ensure_ascii=False)
+
+ print("Crawling complete. Data saved to docs/archive/")
+
+if __name__ == '__main__':
+ main() \ No newline at end of file
diff --git a/scripts/test.sh b/scripts/test.sh
new file mode 100755
index 0000000..a66915e
--- /dev/null
+++ b/scripts/test.sh
@@ -0,0 +1,111 @@
+#!/run/current-system/sw/bin/bash
+
+# Test script for Nixtaml single-page website
+# Checks file references and basic HTML structure
+
+echo "=== Nixtaml Website Integration Test ==="
+
+# Base directory
+BASE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+INDEX_FILE="$BASE_DIR/index.html"
+
+# Check if index.html exists
+if [ ! -f "$INDEX_FILE" ]; then
+ echo "ERROR: index.html not found at $INDEX_FILE"
+ exit 1
+fi
+
+echo "Checking file references in index.html..."
+
+# Extract src attributes from script tags
+SCRIPT_FILES=$(grep -oP 'src="\K[^"]+' "$INDEX_FILE")
+# Extract href from link tags, excluding anchors
+LINK_FILES=$(grep -oP 'href="\K[^"#][^"]*' "$INDEX_FILE")
+
+# Combine and deduplicate
+ALL_FILES=$(echo -e "$SCRIPT_FILES\n$LINK_FILES" | sort | uniq)
+
+MISSING_FILES=()
+for file in $ALL_FILES; do
+ # Skip external URLs (starting with http)
+ if [[ $file == http* ]]; then
+ continue
+ fi
+
+ # Check if file exists relative to BASE_DIR
+ if [ ! -f "$BASE_DIR/$file" ]; then
+ MISSING_FILES+=("$file")
+ fi
+done
+
+# Report missing files
+if [ ${#MISSING_FILES[@]} -gt 0 ]; then
+ echo "ERROR: Missing referenced files:"
+ for file in "${MISSING_FILES[@]}"; do
+ echo " - $file"
+ done
+else
+ echo "✓ All referenced files exist"
+fi
+
+# Basic HTML structure validation
+echo "Validating basic HTML structure..."
+
+# Check for required elements
+if ! grep -q "<!DOCTYPE html>" "$INDEX_FILE"; then
+ echo "WARNING: Missing DOCTYPE declaration"
+fi
+
+if ! grep -q "<html" "$INDEX_FILE"; then
+ echo "ERROR: Missing <html> tag"
+fi
+
+if ! grep -q "</html>" "$INDEX_FILE"; then
+ echo "ERROR: Missing </html> closing tag"
+fi
+
+if ! grep -q "<head>" "$INDEX_FILE"; then
+ echo "ERROR: Missing <head> tag"
+fi
+
+if ! grep -q "<body>" "$INDEX_FILE"; then
+ echo "ERROR: Missing <body> tag"
+fi
+
+# Check for balanced tags (basic check)
+OPEN_TAGS=$(grep -o '<[^/][^>]*>' "$INDEX_FILE" | wc -l)
+CLOSE_TAGS=$(grep -o '</[^>]*>' "$INDEX_FILE" | wc -l)
+
+if [ "$OPEN_TAGS" -ne "$CLOSE_TAGS" ]; then
+ echo "WARNING: Potential unbalanced tags (open: $OPEN_TAGS, close: $CLOSE_TAGS)"
+else
+ echo "✓ Basic tag balance looks good"
+fi
+
+# Check for required sections
+REQUIRED_SECTIONS=("home" "install" "cookbook" "docs" "community")
+for section in "${REQUIRED_SECTIONS[@]}"; do
+ if ! grep -q "id=\"$section\"" "$INDEX_FILE"; then
+ echo "ERROR: Missing section with id=\"$section\""
+ fi
+done
+
+echo "✓ Section IDs present"
+
+# Check for WebGL canvas insertion point
+if ! grep -q "webgl-bg.js" "$INDEX_FILE"; then
+ echo "WARNING: webgl-bg.js not referenced - WebGL background may not load"
+fi
+
+if ! grep -q "parallax.js" "$INDEX_FILE"; then
+ echo "WARNING: parallax.js not referenced - Parallax effects may not work"
+fi
+
+echo "=== Test Complete ==="
+
+if [ ${#MISSING_FILES[@]} -gt 0 ]; then
+ echo "FAIL: Issues found"
+ exit 1
+else
+ echo "PASS: All checks passed"
+fi \ No newline at end of file