diff --git a/core/services/__pycache__/resolution.cpython-311.pyc b/core/services/__pycache__/resolution.cpython-311.pyc index 19de47f..2a2dc89 100644 Binary files a/core/services/__pycache__/resolution.cpython-311.pyc and b/core/services/__pycache__/resolution.cpython-311.pyc differ diff --git a/core/services/resolution.py b/core/services/resolution.py index c153a4f..72824d3 100644 --- a/core/services/resolution.py +++ b/core/services/resolution.py @@ -7,53 +7,39 @@ from urllib.parse import urljoin, quote logger = logging.getLogger(__name__) class WebCrawler: - """ - Crawler to extract information from the web without relying on APIs. - """ def __init__(self): self.session = requests.Session() self.session.headers.update({ - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" }) def fetch_url(self, url): - """ - Fetch URL and extract basic metadata and image links. - """ + """Fetch URL, extract title, meta description, and top images.""" try: + logger.info(f"Crawling page: {url}") response = self.session.get(url, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") - # Extract meta tags - metadata = { - "title": soup.title.string if soup.title else "No title", - "description": soup.find("meta", attrs={"name": "description"}), - } - if metadata["description"]: - metadata["description"] = metadata["description"].get("content", "") - else: - metadata["description"] = "" + title = soup.title.string.strip() if soup.title else "" + desc_tag = soup.find("meta", attrs={"name": "description"}) + description = desc_tag.get("content", "").strip() if desc_tag else "" - # Extract images (top 3) images = [] - for img in soup.find_all("img", limit=3): + for img in soup.find_all("img", limit=5): src = img.get("src") - if src: - images.append(urljoin(url, src)) + if src and not src.startswith("data:"): + full_src = urljoin(url, src) + images.append(full_src) - return metadata, images + return {"title": title, "description": description}, images except Exception as e: - logger.error(f"Failed to crawl {url}: {e}") + logger.warning(f"Crawling failed for {url}: {e}") return None, [] def search(self, query): - """ - Perform a simulated search on Google using requests. - """ + """Perform a Google search.""" search_url = f"https://www.google.com/search?q={quote(query)}" - logger.info(f"Crawling URL: {search_url}") - try: response = self.session.get(search_url, timeout=10) response.raise_for_status() @@ -65,78 +51,44 @@ class WebCrawler: link_elem = g.select_one("a") if title_elem and link_elem: url = link_elem.get("href") - # Handle Google's link redirecting if url.startswith("/url?q="): url = url.split("/url?q=")[1].split("&")[0] - results.append({ - "title": title_elem.get_text(), - "url": url - }) + results.append({"title": title_elem.get_text(), "url": url}) return results except Exception as e: - logger.error(f"Search failed: {e}") + logger.error(f"Search failed for {query}: {e}") return [] class NetworkDiscoveryService: @staticmethod def perform_osint_search(query): - """ - Performs discovery using Web Crawling, extracting metadata and images. - """ - try: - crawler = WebCrawler() - search_results = crawler.search(query) + """Perform discovery using Web Crawling, extracting metadata and images.""" + crawler = WebCrawler() + search_results = crawler.search(query) + + source, _ = Source.objects.get_or_create(name='Web Crawler Engine') + person, _ = Entity.objects.get_or_create(entity_type='PERSON', value=query, source=source) + + # Use first valid image found among search results if available + found_photo = None + + for res in search_results[:3]: # Limit crawling to top 3 + meta, images = crawler.fetch_url(res['url']) - source, _ = Source.objects.get_or_create(name='Web Crawler Engine') + if images and not found_photo: + found_photo = images[0] - # 1. Create main entity - person, _ = Entity.objects.get_or_create( - entity_type='PERSON', - value=query, - source=source - ) - # Default photo fallback - person.photo_url = f"https://api.dicebear.com/7.x/pixel-art/svg?seed={query.replace(' ', '+')}" - - # 2. Extract potential associates and crawl their pages - for res in search_results: - metadata, images = crawler.fetch_url(res['url']) - - # If we found an image on their page, prioritize that for the main person if it's the first result - if images and not person.photo_url.startswith("https://api.dicebear.com"): - person.photo_url = images[0] - elif images and person.photo_url.startswith("https://api.dicebear.com"): - # For demo purposes, set photo from the first relevant page - person.photo_url = images[0] - - # Create associate - associate_val = metadata['title'] if metadata and metadata['title'] != "No title" else res['title'][:50] - if associate_val != query: + if meta: + associate_val = meta['title'] or res['title'] + if associate_val and associate_val.lower() != query.lower(): associate, _ = Entity.objects.get_or_create( - entity_type='PERSON', - value=associate_val, - source=source + entity_type='PERSON', value=associate_val, source=source ) - - # Store link/metadata info if you have a field for it - - # 3. Create relationship Relationship.objects.get_or_create( - source_entity=person, - target_entity=associate, - relationship_type='ASSOCIATED_WITH', - weight=0.5 + source_entity=person, target_entity=associate, + relationship_type='ASSOCIATED_WITH', weight=0.5 ) - - person.save() - return person - - except Exception as e: - logger.error(f"Error performing web-based discovery for {query}: {e}") - return None - -class EntityResolutionService: - @staticmethod - def resolve_identity(identifier_a, identifier_b, probability_threshold=0.8): - # Implementation remains unchanged - return True + + person.photo_url = found_photo or f"https://api.dicebear.com/7.x/initials/svg?seed={quote(query)}" + person.save() + return person \ No newline at end of file diff --git a/core/templates/core/dashboard.html b/core/templates/core/dashboard.html index 0a2f27d..431e822 100644 --- a/core/templates/core/dashboard.html +++ b/core/templates/core/dashboard.html @@ -6,6 +6,7 @@ .node-group { cursor: pointer; } .node-circle { stroke: #fff; stroke-width: 2px; } .node-text { font-size: 10px; pointer-events: none; } + #loader { display: none; }
Discovering network...
'); + const searchBtn = document.getElementById('searchBtn'); + const btnText = document.getElementById('btnText'); + const loader = document.getElementById('loader'); + + // UI Loading state + searchBtn.disabled = true; + btnText.textContent = "Searching..."; + loader.style.display = "inline-block"; + graphContainer.html('Discovering network, please wait...
'); fetch(`{% url 'core:search_api' %}?q=${encodeURIComponent(query)}`) - .then(response => response.json()) + .then(response => { + if (!response.ok) throw new Error("Search failed"); + return response.json(); + }) .then(data => { graphContainer.html(''); // clear renderGraph(data); + }) + .catch(err => { + graphContainer.html(`Error: ${err.message}
`); + }) + .finally(() => { + searchBtn.disabled = false; + btnText.textContent = "Discover"; + loader.style.display = "none"; }); }); @@ -135,4 +160,4 @@ function renderGraph(data) { } } -{% endblock %} +{% endblock %} \ No newline at end of file