import re from pathlib import Path def get_file_extension_and_default_name(attributes: str, code: str) -> tuple[str, str]: """Determine file extension and default name based on attributes and content.""" # Extract language from attributes (e.g., language=bash) language_match = re.search(r'language=(\w+)', attributes) language = language_match.group(1).lower() if language_match else 'unknown' if language == 'bash' or 'incus' in code.lower(): return '.sh', 'incus_script' elif language == 'yaml': return '.yaml', 'netplan_config' elif language == 'python': return '.py', 'python_script' else: # Assume .conf for blocks without language or resembling config return '.conf', 'dnsmasq_config' def generate_unique_filename(base_dir: Path, base_name: str, extension: str) -> Path: """Generate a unique filename by appending a number if the file exists.""" counter = 1 while True: filename = f"{base_name}_{counter}{extension}" file_path = base_dir / filename if not file_path.exists(): return file_path counter += 1 def extract_listings(): """Extract lstlisting code blocks from main.tex and save to scripts folder.""" # Define paths base_dir = Path(__file__).parent.parent # ~/edu/paso/dnsmasq tex_file = base_dir / 'doc' / 'main.tex' scripts_dir = base_dir / 'scripts' # Ensure scripts directory exists scripts_dir.mkdir(exist_ok=True) # Read the LaTeX file try: with open(tex_file, 'r', encoding='utf-8') as f: content = f.read() except FileNotFoundError: print(f"Error: {tex_file} not found") return except Exception as e: print(f"Error reading {tex_file}: {str(e)}") return # Regex to match lstlisting environments # Captures optional attributes [language=...,label=...] and code block content pattern = r'\\begin\{lstlisting\}(\[(.*?)\])?(.*?)\\end\{lstlisting\}' matches = re.finditer(pattern, content, re.DOTALL) extracted_count = 0 for match in matches: # Extract attributes (if any) and code attributes = match.group(2) or '' # Empty string if no attributes code = match.group(3).strip() # Extract label from attributes (e.g., label={setup_container}) label_match = re.search(r'label=\{([^}]+)\}', attributes) label = label_match.group(1) if label_match else None # Get file extension and default name extension, default_name = get_file_extension_and_default_name(attributes, code) # Use label if available, otherwise use default name base_name = label if label else default_name # Generate unique filename file_path = generate_unique_filename(scripts_dir, base_name, extension) # Save the code block to a file try: with open(file_path, 'w', encoding='utf-8') as f: f.write(code) print(f"Saved code block to {file_path}") extracted_count += 1 except Exception as e: print(f"Error saving {file_path}: {str(e)}") if extracted_count == 0: print("No lstlisting code blocks found in main.tex") else: print(f"Completed: Extracted {extracted_count} code block(s) to {scripts_dir}") if __name__ == "__main__": extract_listings()