import re def get_youtube_id(url): """Extracts the video ID from a YouTube URL.""" if not url: return None url = url.strip() # Handle various formats: # https://youtu.be/VIDEO_ID # https://www.youtube.com/watch?v=VIDEO_ID # https://www.youtube.com/live/VIDEO_ID # https://www.youtube.com/embed/VIDEO_ID # https://www.youtube.com/shorts/VIDEO_ID patterns = [ r'(?:v=|\/)([0-9A-Za-z_-]{11})(?:[?&]|$)', r'(?:youtu\.be\/)([0-9A-Za-z_-]{11})', r'(?:live\/)([0-9A-Za-z_-]{11})', r'(?:embed\/)([0-9A-Za-z_-]{11})', r'(?:shorts\/)([0-9A-Za-z_-]{11})', ] for pattern in patterns: match = re.search(pattern, url) if match: return match.group(1) return None urls = [ ("https://www.youtube.com/watch?v=dQw4w9WgXcQ", "dQw4w9WgXcQ"), ("https://youtu.be/dQw4w9WgXcQ", "dQw4w9WgXcQ"), ("https://www.youtube.com/embed/dQw4w9WgXcQ", "dQw4w9WgXcQ"), ("https://www.youtube.com/shorts/dQw4w9WgXcQ", "dQw4w9WgXcQ"), ("https://www.youtube.com/live/dQw4w9WgXcQ", "dQw4w9WgXcQ"), (" https://www.youtube.com/watch?v=dQw4w9WgXcQ ", "dQw4w9WgXcQ"), # Whitespace ("https://www.youtube.com/watch?v=dQw4w9WgXcQ&t=1s", "dQw4w9WgXcQ"), ("https://m.youtube.com/watch?v=dQw4w9WgXcQ", "dQw4w9WgXcQ"), ] print("Testing YouTube ID extraction...") for url, expected in urls: result = get_youtube_id(url) print(f"URL: {url.strip()} -> Expected: {expected}, Got: {result} -> {'PASS' if result == expected else 'FAIL'}")