You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
montpelliermaalsi2024/obsidian/bin/obsidian_converter.py

342 lines
12 KiB
Python

4 months ago
#!/usr/bin/env python3
"""
Convert Obsidian-style wiki links to standard markdown links.
This script processes markdown files containing Obsidian wiki links in the format:
- [[ mypage ]] -> [mypage](mypage.md)
- [[ myfolder/mypage ]] -> [myfolder/mypage](myfolder/mypage.md)
- [[ myfolder/mypage | my description ]] -> [my description](myfolder/mypage.md)
Usage:
python obsidian_converter.py source_folder destination_folder
"""
import os
import re
import sys
import shutil
import argparse
from pathlib import Path
def build_file_index(source_folder):
"""
Build an index of all markdown files in the source folder.
Maps filename (without extension) to full relative paths.
Args:
source_folder (Path): Root folder to index
Returns:
dict: Map of filename -> list of relative paths
"""
file_index = {}
for md_file in source_folder.rglob('*.md'):
relative_path = md_file.relative_to(source_folder)
filename = md_file.stem # filename without extension
if filename not in file_index:
file_index[filename] = []
file_index[filename].append(relative_path)
return file_index
def convert_wiki_links(content, file_index, current_file_path, source_folder):
"""
Convert wiki-style links to standard markdown links with relative paths.
Args:
content (str): The markdown content to process
file_index (dict): Map of filenames to their paths
current_file_path (Path): Path of the current file being processed (relative to source)
source_folder (Path): Root source folder
Returns:
tuple: (converted_content, list of warnings)
"""
# Pattern to match wiki links: [[ link ]] or [[ link | description ]]
# Group 1: the link path
# Group 2: optional description (after |)
wiki_link_pattern = r'\[\[\s*([^|\]]+?)(?:\s*\|\s*([^\]]+?))?\s*\]\]'
warnings = []
def replace_wiki_link(match):
link_path = match.group(1).strip()
description = match.group(2)
# If there's a custom description, use it; otherwise use the link text
if description:
display_text = description.strip()
else:
# Extract just the filename for display (without folder path)
display_text = os.path.basename(link_path)
# Check if the link already includes a path separator
if '/' in link_path or '\\' in link_path:
# User specified a path - use it as-is
target_path = link_path
if not target_path.endswith('.md'):
target_path += '.md'
# Verify the file exists
full_path = source_folder / target_path
if not full_path.exists():
warnings.append(f"File not found for link '[[ {link_path} ]]' -> {target_path}")
return f'[{display_text}]({target_path})'
# Convert to relative path
target_path = Path(target_path)
else:
# Just a filename - search for it
filename = link_path
if filename in file_index:
paths = file_index[filename]
if len(paths) == 1:
# Single match - use it
target_path = paths[0]
else:
# Multiple matches - warn and use the first one
target_path = paths[0]
paths_list = '\n '.join(str(p) for p in paths)
warnings.append(
f"Multiple files found for '[[ {filename} ]]':\n {paths_list}\n Using: {target_path}"
)
else:
# File not found
warnings.append(f"File not found for link '[[ {link_path} ]]' - no matching .md file")
# Still create a link, but it will be broken
target_path = link_path
if not target_path.endswith('.md'):
target_path += '.md'
return f'[{display_text}]({target_path})'
# Calculate relative path from current file to target file
# current_file_path is relative to source_folder (e.g., 'notes/index.md')
# target_path is also relative to source_folder (e.g., 'reference/MyPage.md')
current_dir = current_file_path.parent
# Calculate relative path
try:
relative_path = os.path.relpath(target_path, current_dir)
# Normalize path separators to forward slashes for markdown
relative_path = relative_path.replace('\\', '/')
except ValueError:
# Fallback if relpath fails (different drives on Windows)
relative_path = str(target_path)
return f'[{display_text}]({relative_path})'
# Replace all wiki links with standard markdown links
converted_content = re.sub(wiki_link_pattern, replace_wiki_link, content)
return converted_content, warnings
def process_markdown_file(source_path, dest_path, file_index, source_folder, dry_run=False, verbose=False):
"""
Process a single markdown file, converting wiki links.
Args:
source_path (Path): Source file path
dest_path (Path): Destination file path
file_index (dict): Map of filenames to their paths
source_folder (Path): Root source folder
dry_run (bool): If True, don't actually write files
verbose (bool): If True, show detailed output
Returns:
tuple: (conversions_made, list of warnings)
"""
try:
with open(source_path, 'r', encoding='utf-8') as f:
content = f.read()
# Calculate relative path from source folder for this file
relative_file_path = source_path.relative_to(source_folder)
# Convert wiki links
converted_content, warnings = convert_wiki_links(content, file_index, relative_file_path, source_folder)
# Check if any conversions were made
conversions_made = content != converted_content
if dry_run:
status = "WOULD CONVERT" if conversions_made else "NO CHANGES"
print(f"{status}: {source_path} -> {dest_path}")
if verbose and conversions_made:
# Show what wiki links were found
wiki_links = re.findall(r'\[\[\s*([^|\]]+?)(?:\s*\|\s*([^\]]+?))?\s*\]\]', content)
for link in wiki_links:
if link[1]: # Has custom description
print(f" [[ {link[0]} | {link[1]} ]]")
else:
print(f" [[ {link[0]} ]]")
if warnings and verbose:
for warning in warnings:
print(f"{warning}")
else:
# Ensure destination directory exists
dest_path.parent.mkdir(parents=True, exist_ok=True)
# Write converted content
with open(dest_path, 'w', encoding='utf-8') as f:
f.write(converted_content)
if verbose or conversions_made:
status = "CONVERTED" if conversions_made else "COPIED"
print(f"{status}: {source_path} -> {dest_path}")
# Display warnings for this file
if warnings:
print(f"⚠ WARNING in {source_path.relative_to(source_folder)}:")
for warning in warnings:
print(f" {warning}")
return conversions_made, warnings
except Exception as e:
print(f"ERROR processing {source_path}: {e}")
return False, []
def main():
"""Main function to handle command line arguments and process files."""
parser = argparse.ArgumentParser(
description="Convert Obsidian-style wiki links to standard markdown links.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python obsidian_converter.py ./my_obsidian_vault ./converted_markdown
python obsidian_converter.py /path/to/obsidian /path/to/output
Wiki link conversion examples:
[[ mypage ]] -> [mypage](mypage.md)
[[ myfolder/mypage ]] -> [myfolder/mypage](myfolder/mypage.md)
[[ myfolder/mypage | My Title ]] -> [My Title](myfolder/mypage.md)
"""
)
parser.add_argument(
'source_folder',
help='Source folder containing Obsidian markdown files'
)
parser.add_argument(
'destination_folder',
help='Destination folder for converted markdown files'
)
parser.add_argument(
'--dry-run',
action='store_true',
help='Show what would be converted without actually converting files'
)
parser.add_argument(
'--verbose', '-v',
action='store_true',
help='Show detailed output during conversion'
)
args = parser.parse_args()
source_folder = Path(args.source_folder)
dest_folder = Path(args.destination_folder)
# Validate source folder
if not source_folder.exists():
parser.error(f"Source folder '{source_folder}' does not exist.")
if not source_folder.is_dir():
parser.error(f"'{source_folder}' is not a directory.")
# Create destination folder if it doesn't exist (unless dry run)
if not args.dry_run:
dest_folder.mkdir(parents=True, exist_ok=True)
action = "Would convert" if args.dry_run else "Converting"
print(f"{action} Obsidian markdown files from '{source_folder}' to '{dest_folder}'")
if args.dry_run:
print("(DRY RUN - no files will be modified)")
print("-" * 60)
# Build file index
print("Building file index...")
file_index = build_file_index(source_folder)
print(f"Indexed {len(file_index)} unique markdown files")
print("-" * 60)
# Track statistics
total_files = 0
converted_files = 0
files_with_conversions = 0
all_warnings = []
# Process all markdown files recursively
for source_path in source_folder.rglob('*.md'):
total_files += 1
# Calculate relative path to maintain folder structure
relative_path = source_path.relative_to(source_folder)
dest_path = dest_folder / relative_path
# Process the file
had_conversions, warnings = process_markdown_file(
source_path, dest_path,
file_index, source_folder,
dry_run=args.dry_run,
verbose=args.verbose
)
converted_files += 1
if had_conversions:
files_with_conversions += 1
all_warnings.extend(warnings)
# Copy non-markdown files as well (images, etc.) - unless dry run
non_md_files = 0
if not args.dry_run:
for source_path in source_folder.rglob('*'):
if source_path.is_file() and not source_path.suffix == '.md':
relative_path = source_path.relative_to(source_folder)
dest_path = dest_folder / relative_path
# Ensure destination directory exists
dest_path.parent.mkdir(parents=True, exist_ok=True)
# Copy the file
shutil.copy2(source_path, dest_path)
non_md_files += 1
if args.verbose:
print(f"COPIED: {source_path} -> {dest_path}")
print("-" * 60)
if args.dry_run:
print("Dry run complete!")
print(f"Total markdown files: {total_files}")
print(f"Files with wiki links to convert: {files_with_conversions}")
else:
print("Conversion complete!")
print(f"Total markdown files processed: {converted_files}/{total_files}")
print(f"Files with wiki links converted: {files_with_conversions}")
if non_md_files > 0:
print(f"Non-markdown files copied: {non_md_files}")
print(f"Output directory: {dest_folder}")
# Summary of warnings
if all_warnings:
print("-" * 60)
print(f"⚠ TOTAL WARNINGS: {len(all_warnings)}")
print("Review the warnings above to fix broken links.")
if __name__ == "__main__":
main()