38146-vm/test_regex_v3.py
2026-02-04 16:21:56 +00:00

44 lines
1.5 KiB
Python

import re
def get_youtube_id(url):
"""Extracts the video ID from a YouTube URL."""
if not url:
return None
url = url.strip()
# Handle various formats:
# https://youtu.be/VIDEO_ID
# https://www.youtube.com/watch?v=VIDEO_ID
# https://www.youtube.com/live/VIDEO_ID
# https://www.youtube.com/embed/VIDEO_ID
# https://www.youtube.com/shorts/VIDEO_ID
patterns = [
r'(?:v=|\/)([0-9A-Za-z_-]{11})(?:[?&]|$)',
r'(?:youtu\.be\/)([0-9A-Za-z_-]{11})',
r'(?:live\/)([0-9A-Za-z_-]{11})',
r'(?:embed\/)([0-9A-Za-z_-]{11})',
r'(?:shorts\/)([0-9A-Za-z_-]{11})',
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)
return None
urls = [
("https://www.youtube.com/watch?v=dQw4w9WgXcQ", "dQw4w9WgXcQ"),
("https://youtu.be/dQw4w9WgXcQ", "dQw4w9WgXcQ"),
("https://www.youtube.com/embed/dQw4w9WgXcQ", "dQw4w9WgXcQ"),
("https://www.youtube.com/shorts/dQw4w9WgXcQ", "dQw4w9WgXcQ"),
("https://www.youtube.com/live/dQw4w9WgXcQ", "dQw4w9WgXcQ"),
(" https://www.youtube.com/watch?v=dQw4w9WgXcQ ", "dQw4w9WgXcQ"), # Whitespace
("https://www.youtube.com/watch?v=dQw4w9WgXcQ&t=1s", "dQw4w9WgXcQ"),
("https://m.youtube.com/watch?v=dQw4w9WgXcQ", "dQw4w9WgXcQ"),
]
print("Testing YouTube ID extraction...")
for url, expected in urls:
result = get_youtube_id(url)
print(f"URL: {url.strip()} -> Expected: {expected}, Got: {result} -> {'PASS' if result == expected else 'FAIL'}")