You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
342 lines
12 KiB
Python
342 lines
12 KiB
Python
|
4 months ago
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
Convert Obsidian-style wiki links to standard markdown links.
|
||
|
|
|
||
|
|
This script processes markdown files containing Obsidian wiki links in the format:
|
||
|
|
- [[ mypage ]] -> [mypage](mypage.md)
|
||
|
|
- [[ myfolder/mypage ]] -> [myfolder/mypage](myfolder/mypage.md)
|
||
|
|
- [[ myfolder/mypage | my description ]] -> [my description](myfolder/mypage.md)
|
||
|
|
|
||
|
|
Usage:
|
||
|
|
python obsidian_converter.py source_folder destination_folder
|
||
|
|
"""
|
||
|
|
|
||
|
|
import os
|
||
|
|
import re
|
||
|
|
import sys
|
||
|
|
import shutil
|
||
|
|
import argparse
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
|
||
|
|
def build_file_index(source_folder):
|
||
|
|
"""
|
||
|
|
Build an index of all markdown files in the source folder.
|
||
|
|
Maps filename (without extension) to full relative paths.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
source_folder (Path): Root folder to index
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
dict: Map of filename -> list of relative paths
|
||
|
|
"""
|
||
|
|
file_index = {}
|
||
|
|
|
||
|
|
for md_file in source_folder.rglob('*.md'):
|
||
|
|
relative_path = md_file.relative_to(source_folder)
|
||
|
|
filename = md_file.stem # filename without extension
|
||
|
|
|
||
|
|
if filename not in file_index:
|
||
|
|
file_index[filename] = []
|
||
|
|
file_index[filename].append(relative_path)
|
||
|
|
|
||
|
|
return file_index
|
||
|
|
|
||
|
|
|
||
|
|
def convert_wiki_links(content, file_index, current_file_path, source_folder):
|
||
|
|
"""
|
||
|
|
Convert wiki-style links to standard markdown links with relative paths.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
content (str): The markdown content to process
|
||
|
|
file_index (dict): Map of filenames to their paths
|
||
|
|
current_file_path (Path): Path of the current file being processed (relative to source)
|
||
|
|
source_folder (Path): Root source folder
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
tuple: (converted_content, list of warnings)
|
||
|
|
"""
|
||
|
|
# Pattern to match wiki links: [[ link ]] or [[ link | description ]]
|
||
|
|
# Group 1: the link path
|
||
|
|
# Group 2: optional description (after |)
|
||
|
|
wiki_link_pattern = r'\[\[\s*([^|\]]+?)(?:\s*\|\s*([^\]]+?))?\s*\]\]'
|
||
|
|
|
||
|
|
warnings = []
|
||
|
|
|
||
|
|
def replace_wiki_link(match):
|
||
|
|
link_path = match.group(1).strip()
|
||
|
|
description = match.group(2)
|
||
|
|
|
||
|
|
# If there's a custom description, use it; otherwise use the link text
|
||
|
|
if description:
|
||
|
|
display_text = description.strip()
|
||
|
|
else:
|
||
|
|
# Extract just the filename for display (without folder path)
|
||
|
|
display_text = os.path.basename(link_path)
|
||
|
|
|
||
|
|
# Check if the link already includes a path separator
|
||
|
|
if '/' in link_path or '\\' in link_path:
|
||
|
|
# User specified a path - use it as-is
|
||
|
|
target_path = link_path
|
||
|
|
if not target_path.endswith('.md'):
|
||
|
|
target_path += '.md'
|
||
|
|
|
||
|
|
# Verify the file exists
|
||
|
|
full_path = source_folder / target_path
|
||
|
|
if not full_path.exists():
|
||
|
|
warnings.append(f"File not found for link '[[ {link_path} ]]' -> {target_path}")
|
||
|
|
return f'[{display_text}]({target_path})'
|
||
|
|
|
||
|
|
# Convert to relative path
|
||
|
|
target_path = Path(target_path)
|
||
|
|
else:
|
||
|
|
# Just a filename - search for it
|
||
|
|
filename = link_path
|
||
|
|
|
||
|
|
if filename in file_index:
|
||
|
|
paths = file_index[filename]
|
||
|
|
|
||
|
|
if len(paths) == 1:
|
||
|
|
# Single match - use it
|
||
|
|
target_path = paths[0]
|
||
|
|
else:
|
||
|
|
# Multiple matches - warn and use the first one
|
||
|
|
target_path = paths[0]
|
||
|
|
paths_list = '\n '.join(str(p) for p in paths)
|
||
|
|
warnings.append(
|
||
|
|
f"Multiple files found for '[[ {filename} ]]':\n {paths_list}\n Using: {target_path}"
|
||
|
|
)
|
||
|
|
else:
|
||
|
|
# File not found
|
||
|
|
warnings.append(f"File not found for link '[[ {link_path} ]]' - no matching .md file")
|
||
|
|
# Still create a link, but it will be broken
|
||
|
|
target_path = link_path
|
||
|
|
if not target_path.endswith('.md'):
|
||
|
|
target_path += '.md'
|
||
|
|
return f'[{display_text}]({target_path})'
|
||
|
|
|
||
|
|
# Calculate relative path from current file to target file
|
||
|
|
# current_file_path is relative to source_folder (e.g., 'notes/index.md')
|
||
|
|
# target_path is also relative to source_folder (e.g., 'reference/MyPage.md')
|
||
|
|
|
||
|
|
current_dir = current_file_path.parent
|
||
|
|
|
||
|
|
# Calculate relative path
|
||
|
|
try:
|
||
|
|
relative_path = os.path.relpath(target_path, current_dir)
|
||
|
|
# Normalize path separators to forward slashes for markdown
|
||
|
|
relative_path = relative_path.replace('\\', '/')
|
||
|
|
except ValueError:
|
||
|
|
# Fallback if relpath fails (different drives on Windows)
|
||
|
|
relative_path = str(target_path)
|
||
|
|
|
||
|
|
return f'[{display_text}]({relative_path})'
|
||
|
|
|
||
|
|
# Replace all wiki links with standard markdown links
|
||
|
|
converted_content = re.sub(wiki_link_pattern, replace_wiki_link, content)
|
||
|
|
|
||
|
|
return converted_content, warnings
|
||
|
|
|
||
|
|
|
||
|
|
def process_markdown_file(source_path, dest_path, file_index, source_folder, dry_run=False, verbose=False):
|
||
|
|
"""
|
||
|
|
Process a single markdown file, converting wiki links.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
source_path (Path): Source file path
|
||
|
|
dest_path (Path): Destination file path
|
||
|
|
file_index (dict): Map of filenames to their paths
|
||
|
|
source_folder (Path): Root source folder
|
||
|
|
dry_run (bool): If True, don't actually write files
|
||
|
|
verbose (bool): If True, show detailed output
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
tuple: (conversions_made, list of warnings)
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
with open(source_path, 'r', encoding='utf-8') as f:
|
||
|
|
content = f.read()
|
||
|
|
|
||
|
|
# Calculate relative path from source folder for this file
|
||
|
|
relative_file_path = source_path.relative_to(source_folder)
|
||
|
|
|
||
|
|
# Convert wiki links
|
||
|
|
converted_content, warnings = convert_wiki_links(content, file_index, relative_file_path, source_folder)
|
||
|
|
|
||
|
|
# Check if any conversions were made
|
||
|
|
conversions_made = content != converted_content
|
||
|
|
|
||
|
|
if dry_run:
|
||
|
|
status = "WOULD CONVERT" if conversions_made else "NO CHANGES"
|
||
|
|
print(f"{status}: {source_path} -> {dest_path}")
|
||
|
|
if verbose and conversions_made:
|
||
|
|
# Show what wiki links were found
|
||
|
|
wiki_links = re.findall(r'\[\[\s*([^|\]]+?)(?:\s*\|\s*([^\]]+?))?\s*\]\]', content)
|
||
|
|
for link in wiki_links:
|
||
|
|
if link[1]: # Has custom description
|
||
|
|
print(f" [[ {link[0]} | {link[1]} ]]")
|
||
|
|
else:
|
||
|
|
print(f" [[ {link[0]} ]]")
|
||
|
|
if warnings and verbose:
|
||
|
|
for warning in warnings:
|
||
|
|
print(f" ⚠ {warning}")
|
||
|
|
else:
|
||
|
|
# Ensure destination directory exists
|
||
|
|
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
||
|
|
|
||
|
|
# Write converted content
|
||
|
|
with open(dest_path, 'w', encoding='utf-8') as f:
|
||
|
|
f.write(converted_content)
|
||
|
|
|
||
|
|
if verbose or conversions_made:
|
||
|
|
status = "CONVERTED" if conversions_made else "COPIED"
|
||
|
|
print(f"{status}: {source_path} -> {dest_path}")
|
||
|
|
|
||
|
|
# Display warnings for this file
|
||
|
|
if warnings:
|
||
|
|
print(f"⚠ WARNING in {source_path.relative_to(source_folder)}:")
|
||
|
|
for warning in warnings:
|
||
|
|
print(f" {warning}")
|
||
|
|
|
||
|
|
return conversions_made, warnings
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
print(f"ERROR processing {source_path}: {e}")
|
||
|
|
return False, []
|
||
|
|
|
||
|
|
|
||
|
|
def main():
|
||
|
|
"""Main function to handle command line arguments and process files."""
|
||
|
|
parser = argparse.ArgumentParser(
|
||
|
|
description="Convert Obsidian-style wiki links to standard markdown links.",
|
||
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||
|
|
epilog="""
|
||
|
|
Examples:
|
||
|
|
python obsidian_converter.py ./my_obsidian_vault ./converted_markdown
|
||
|
|
python obsidian_converter.py /path/to/obsidian /path/to/output
|
||
|
|
|
||
|
|
Wiki link conversion examples:
|
||
|
|
[[ mypage ]] -> [mypage](mypage.md)
|
||
|
|
[[ myfolder/mypage ]] -> [myfolder/mypage](myfolder/mypage.md)
|
||
|
|
[[ myfolder/mypage | My Title ]] -> [My Title](myfolder/mypage.md)
|
||
|
|
"""
|
||
|
|
)
|
||
|
|
|
||
|
|
parser.add_argument(
|
||
|
|
'source_folder',
|
||
|
|
help='Source folder containing Obsidian markdown files'
|
||
|
|
)
|
||
|
|
|
||
|
|
parser.add_argument(
|
||
|
|
'destination_folder',
|
||
|
|
help='Destination folder for converted markdown files'
|
||
|
|
)
|
||
|
|
|
||
|
|
parser.add_argument(
|
||
|
|
'--dry-run',
|
||
|
|
action='store_true',
|
||
|
|
help='Show what would be converted without actually converting files'
|
||
|
|
)
|
||
|
|
|
||
|
|
parser.add_argument(
|
||
|
|
'--verbose', '-v',
|
||
|
|
action='store_true',
|
||
|
|
help='Show detailed output during conversion'
|
||
|
|
)
|
||
|
|
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
source_folder = Path(args.source_folder)
|
||
|
|
dest_folder = Path(args.destination_folder)
|
||
|
|
|
||
|
|
# Validate source folder
|
||
|
|
if not source_folder.exists():
|
||
|
|
parser.error(f"Source folder '{source_folder}' does not exist.")
|
||
|
|
|
||
|
|
if not source_folder.is_dir():
|
||
|
|
parser.error(f"'{source_folder}' is not a directory.")
|
||
|
|
|
||
|
|
# Create destination folder if it doesn't exist (unless dry run)
|
||
|
|
if not args.dry_run:
|
||
|
|
dest_folder.mkdir(parents=True, exist_ok=True)
|
||
|
|
|
||
|
|
action = "Would convert" if args.dry_run else "Converting"
|
||
|
|
print(f"{action} Obsidian markdown files from '{source_folder}' to '{dest_folder}'")
|
||
|
|
if args.dry_run:
|
||
|
|
print("(DRY RUN - no files will be modified)")
|
||
|
|
print("-" * 60)
|
||
|
|
|
||
|
|
# Build file index
|
||
|
|
print("Building file index...")
|
||
|
|
file_index = build_file_index(source_folder)
|
||
|
|
print(f"Indexed {len(file_index)} unique markdown files")
|
||
|
|
print("-" * 60)
|
||
|
|
|
||
|
|
# Track statistics
|
||
|
|
total_files = 0
|
||
|
|
converted_files = 0
|
||
|
|
files_with_conversions = 0
|
||
|
|
all_warnings = []
|
||
|
|
|
||
|
|
# Process all markdown files recursively
|
||
|
|
for source_path in source_folder.rglob('*.md'):
|
||
|
|
total_files += 1
|
||
|
|
|
||
|
|
# Calculate relative path to maintain folder structure
|
||
|
|
relative_path = source_path.relative_to(source_folder)
|
||
|
|
dest_path = dest_folder / relative_path
|
||
|
|
|
||
|
|
# Process the file
|
||
|
|
had_conversions, warnings = process_markdown_file(
|
||
|
|
source_path, dest_path,
|
||
|
|
file_index, source_folder,
|
||
|
|
dry_run=args.dry_run,
|
||
|
|
verbose=args.verbose
|
||
|
|
)
|
||
|
|
|
||
|
|
converted_files += 1
|
||
|
|
if had_conversions:
|
||
|
|
files_with_conversions += 1
|
||
|
|
|
||
|
|
all_warnings.extend(warnings)
|
||
|
|
|
||
|
|
# Copy non-markdown files as well (images, etc.) - unless dry run
|
||
|
|
non_md_files = 0
|
||
|
|
if not args.dry_run:
|
||
|
|
for source_path in source_folder.rglob('*'):
|
||
|
|
if source_path.is_file() and not source_path.suffix == '.md':
|
||
|
|
relative_path = source_path.relative_to(source_folder)
|
||
|
|
dest_path = dest_folder / relative_path
|
||
|
|
|
||
|
|
# Ensure destination directory exists
|
||
|
|
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
||
|
|
|
||
|
|
# Copy the file
|
||
|
|
shutil.copy2(source_path, dest_path)
|
||
|
|
non_md_files += 1
|
||
|
|
if args.verbose:
|
||
|
|
print(f"COPIED: {source_path} -> {dest_path}")
|
||
|
|
|
||
|
|
print("-" * 60)
|
||
|
|
if args.dry_run:
|
||
|
|
print("Dry run complete!")
|
||
|
|
print(f"Total markdown files: {total_files}")
|
||
|
|
print(f"Files with wiki links to convert: {files_with_conversions}")
|
||
|
|
else:
|
||
|
|
print("Conversion complete!")
|
||
|
|
print(f"Total markdown files processed: {converted_files}/{total_files}")
|
||
|
|
print(f"Files with wiki links converted: {files_with_conversions}")
|
||
|
|
if non_md_files > 0:
|
||
|
|
print(f"Non-markdown files copied: {non_md_files}")
|
||
|
|
print(f"Output directory: {dest_folder}")
|
||
|
|
|
||
|
|
# Summary of warnings
|
||
|
|
if all_warnings:
|
||
|
|
print("-" * 60)
|
||
|
|
print(f"⚠ TOTAL WARNINGS: {len(all_warnings)}")
|
||
|
|
print("Review the warnings above to fix broken links.")
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|