#!/usr/bin/env python3 """ Convert Obsidian-style wiki links to standard markdown links. This script processes markdown files containing Obsidian wiki links in the format: - [[ mypage ]] -> [mypage](mypage.md) - [[ myfolder/mypage ]] -> [myfolder/mypage](myfolder/mypage.md) - [[ myfolder/mypage | my description ]] -> [my description](myfolder/mypage.md) Usage: python obsidian_converter.py source_folder destination_folder """ import os import re import sys import shutil import argparse from pathlib import Path def build_file_index(source_folder): """ Build an index of all markdown files in the source folder. Maps filename (without extension) to full relative paths. Args: source_folder (Path): Root folder to index Returns: dict: Map of filename -> list of relative paths """ file_index = {} for md_file in source_folder.rglob('*.md'): relative_path = md_file.relative_to(source_folder) filename = md_file.stem # filename without extension if filename not in file_index: file_index[filename] = [] file_index[filename].append(relative_path) return file_index def convert_wiki_links(content, file_index, current_file_path, source_folder): """ Convert wiki-style links to standard markdown links with relative paths. Args: content (str): The markdown content to process file_index (dict): Map of filenames to their paths current_file_path (Path): Path of the current file being processed (relative to source) source_folder (Path): Root source folder Returns: tuple: (converted_content, list of warnings) """ # Pattern to match wiki links: [[ link ]] or [[ link | description ]] # Group 1: the link path # Group 2: optional description (after |) wiki_link_pattern = r'\[\[\s*([^|\]]+?)(?:\s*\|\s*([^\]]+?))?\s*\]\]' warnings = [] def replace_wiki_link(match): link_path = match.group(1).strip() description = match.group(2) # If there's a custom description, use it; otherwise use the link text if description: display_text = description.strip() else: # Extract just the filename for display (without folder path) display_text = os.path.basename(link_path) # Check if the link already includes a path separator if '/' in link_path or '\\' in link_path: # User specified a path - use it as-is target_path = link_path if not target_path.endswith('.md'): target_path += '.md' # Verify the file exists full_path = source_folder / target_path if not full_path.exists(): warnings.append(f"File not found for link '[[ {link_path} ]]' -> {target_path}") return f'[{display_text}]({target_path})' # Convert to relative path target_path = Path(target_path) else: # Just a filename - search for it filename = link_path if filename in file_index: paths = file_index[filename] if len(paths) == 1: # Single match - use it target_path = paths[0] else: # Multiple matches - warn and use the first one target_path = paths[0] paths_list = '\n '.join(str(p) for p in paths) warnings.append( f"Multiple files found for '[[ {filename} ]]':\n {paths_list}\n Using: {target_path}" ) else: # File not found warnings.append(f"File not found for link '[[ {link_path} ]]' - no matching .md file") # Still create a link, but it will be broken target_path = link_path if not target_path.endswith('.md'): target_path += '.md' return f'[{display_text}]({target_path})' # Calculate relative path from current file to target file # current_file_path is relative to source_folder (e.g., 'notes/index.md') # target_path is also relative to source_folder (e.g., 'reference/MyPage.md') current_dir = current_file_path.parent # Calculate relative path try: relative_path = os.path.relpath(target_path, current_dir) # Normalize path separators to forward slashes for markdown relative_path = relative_path.replace('\\', '/') except ValueError: # Fallback if relpath fails (different drives on Windows) relative_path = str(target_path) return f'[{display_text}]({relative_path})' # Replace all wiki links with standard markdown links converted_content = re.sub(wiki_link_pattern, replace_wiki_link, content) return converted_content, warnings def process_markdown_file(source_path, dest_path, file_index, source_folder, dry_run=False, verbose=False): """ Process a single markdown file, converting wiki links. Args: source_path (Path): Source file path dest_path (Path): Destination file path file_index (dict): Map of filenames to their paths source_folder (Path): Root source folder dry_run (bool): If True, don't actually write files verbose (bool): If True, show detailed output Returns: tuple: (conversions_made, list of warnings) """ try: with open(source_path, 'r', encoding='utf-8') as f: content = f.read() # Calculate relative path from source folder for this file relative_file_path = source_path.relative_to(source_folder) # Convert wiki links converted_content, warnings = convert_wiki_links(content, file_index, relative_file_path, source_folder) # Check if any conversions were made conversions_made = content != converted_content if dry_run: status = "WOULD CONVERT" if conversions_made else "NO CHANGES" print(f"{status}: {source_path} -> {dest_path}") if verbose and conversions_made: # Show what wiki links were found wiki_links = re.findall(r'\[\[\s*([^|\]]+?)(?:\s*\|\s*([^\]]+?))?\s*\]\]', content) for link in wiki_links: if link[1]: # Has custom description print(f" [[ {link[0]} | {link[1]} ]]") else: print(f" [[ {link[0]} ]]") if warnings and verbose: for warning in warnings: print(f" ⚠ {warning}") else: # Ensure destination directory exists dest_path.parent.mkdir(parents=True, exist_ok=True) # Write converted content with open(dest_path, 'w', encoding='utf-8') as f: f.write(converted_content) if verbose or conversions_made: status = "CONVERTED" if conversions_made else "COPIED" print(f"{status}: {source_path} -> {dest_path}") # Display warnings for this file if warnings: print(f"⚠ WARNING in {source_path.relative_to(source_folder)}:") for warning in warnings: print(f" {warning}") return conversions_made, warnings except Exception as e: print(f"ERROR processing {source_path}: {e}") return False, [] def main(): """Main function to handle command line arguments and process files.""" parser = argparse.ArgumentParser( description="Convert Obsidian-style wiki links to standard markdown links.", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: python obsidian_converter.py ./my_obsidian_vault ./converted_markdown python obsidian_converter.py /path/to/obsidian /path/to/output Wiki link conversion examples: [[ mypage ]] -> [mypage](mypage.md) [[ myfolder/mypage ]] -> [myfolder/mypage](myfolder/mypage.md) [[ myfolder/mypage | My Title ]] -> [My Title](myfolder/mypage.md) """ ) parser.add_argument( 'source_folder', help='Source folder containing Obsidian markdown files' ) parser.add_argument( 'destination_folder', help='Destination folder for converted markdown files' ) parser.add_argument( '--dry-run', action='store_true', help='Show what would be converted without actually converting files' ) parser.add_argument( '--verbose', '-v', action='store_true', help='Show detailed output during conversion' ) args = parser.parse_args() source_folder = Path(args.source_folder) dest_folder = Path(args.destination_folder) # Validate source folder if not source_folder.exists(): parser.error(f"Source folder '{source_folder}' does not exist.") if not source_folder.is_dir(): parser.error(f"'{source_folder}' is not a directory.") # Create destination folder if it doesn't exist (unless dry run) if not args.dry_run: dest_folder.mkdir(parents=True, exist_ok=True) action = "Would convert" if args.dry_run else "Converting" print(f"{action} Obsidian markdown files from '{source_folder}' to '{dest_folder}'") if args.dry_run: print("(DRY RUN - no files will be modified)") print("-" * 60) # Build file index print("Building file index...") file_index = build_file_index(source_folder) print(f"Indexed {len(file_index)} unique markdown files") print("-" * 60) # Track statistics total_files = 0 converted_files = 0 files_with_conversions = 0 all_warnings = [] # Process all markdown files recursively for source_path in source_folder.rglob('*.md'): total_files += 1 # Calculate relative path to maintain folder structure relative_path = source_path.relative_to(source_folder) dest_path = dest_folder / relative_path # Process the file had_conversions, warnings = process_markdown_file( source_path, dest_path, file_index, source_folder, dry_run=args.dry_run, verbose=args.verbose ) converted_files += 1 if had_conversions: files_with_conversions += 1 all_warnings.extend(warnings) # Copy non-markdown files as well (images, etc.) - unless dry run non_md_files = 0 if not args.dry_run: for source_path in source_folder.rglob('*'): if source_path.is_file() and not source_path.suffix == '.md': relative_path = source_path.relative_to(source_folder) dest_path = dest_folder / relative_path # Ensure destination directory exists dest_path.parent.mkdir(parents=True, exist_ok=True) # Copy the file shutil.copy2(source_path, dest_path) non_md_files += 1 if args.verbose: print(f"COPIED: {source_path} -> {dest_path}") print("-" * 60) if args.dry_run: print("Dry run complete!") print(f"Total markdown files: {total_files}") print(f"Files with wiki links to convert: {files_with_conversions}") else: print("Conversion complete!") print(f"Total markdown files processed: {converted_files}/{total_files}") print(f"Files with wiki links converted: {files_with_conversions}") if non_md_files > 0: print(f"Non-markdown files copied: {non_md_files}") print(f"Output directory: {dest_folder}") # Summary of warnings if all_warnings: print("-" * 60) print(f"⚠ TOTAL WARNINGS: {len(all_warnings)}") print("Review the warnings above to fix broken links.") if __name__ == "__main__": main()