diff --git a/core/services/__pycache__/resolution.cpython-311.pyc b/core/services/__pycache__/resolution.cpython-311.pyc index 9fd6878..7c7cad7 100644 Binary files a/core/services/__pycache__/resolution.cpython-311.pyc and b/core/services/__pycache__/resolution.cpython-311.pyc differ diff --git a/core/services/resolution.py b/core/services/resolution.py index 841913e..fe7fa9f 100644 --- a/core/services/resolution.py +++ b/core/services/resolution.py @@ -2,7 +2,7 @@ import requests import logging from bs4 import BeautifulSoup from core.models import Entity, Relationship, Source -from urllib.parse import urljoin, quote +from urllib.parse import urljoin, quote, unquote logger = logging.getLogger(__name__) @@ -16,7 +16,7 @@ class WebCrawler: def fetch_url(self, url): """Fetch URL, extract title, meta description, and top images.""" try: - logger.info(f"Crawling page: {url}") + logger.info(f"CRAWLER: Fetching {url}") response = self.session.get(url, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") @@ -34,29 +34,33 @@ class WebCrawler: return {"title": title, "description": description}, images except Exception as e: - logger.warning(f"Crawling failed for {url}: {e}") + logger.error(f"CRAWLER ERROR: {url}: {e}") return None, [] def search(self, query): - """Perform a Google search.""" - search_url = f"https://www.google.com/search?q={quote(query)}" + """Perform a DuckDuckGo search.""" + search_url = f"https://duckduckgo.com/html/?q={quote(query)}" try: response = self.session.get(search_url, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") results = [] - for g in soup.select("div.g"): - title_elem = g.select_one("h3") - link_elem = g.select_one("a") - if title_elem and link_elem: - url = link_elem.get("href") - if url.startswith("/url?q="): - url = url.split("/url?q=")[1].split("&")[0] - results.append({"title": title_elem.get_text(), "url": url}) + for res in soup.find_all("div", class_="result"): + a_tag = res.find("a", class_="result__a") + if a_tag and a_tag.get("href"): + href = a_tag.get("href") + # Extract real URL from DDG redirection + url = "" + if "uddg=" in href: + url = unquote(href.split("uddg=")[1].split("&")[0]) + else: + url = href + + results.append({"title": a_tag.get_text(), "url": url}) return results except Exception as e: - logger.error(f"Search failed for {query}: {e}") + logger.error(f"SEARCH ERROR: {query}: {e}") return [] class NetworkDiscoveryService: @@ -96,8 +100,7 @@ class NetworkDiscoveryService: class EntityResolutionService: @staticmethod def resolve(data): - # Fallback to NetworkDiscoveryService for now query = data.get('query') if query: return NetworkDiscoveryService.perform_osint_search(query) - return None + return None \ No newline at end of file diff --git a/core/templates/core/dashboard.html b/core/templates/core/dashboard.html index 431e822..7177835 100644 --- a/core/templates/core/dashboard.html +++ b/core/templates/core/dashboard.html @@ -59,7 +59,7 @@ document.getElementById('searchForm').addEventListener('submit', function(e) { loader.style.display = "inline-block"; graphContainer.html('
Discovering network, please wait...
'); - fetch(`{% url 'core:search_api' %}?q=${encodeURIComponent(query)}`) + fetch(`{% url 'core:search_api' %}?q=${encodeURIComponent(query)}`, { method: 'GET' }) .then(response => { if (!response.ok) throw new Error("Search failed"); return response.json();