From ed62ae0c79f1469f3388ac0b05f5dfcf2dafe6c7 Mon Sep 17 00:00:00 2001 From: Flatlogic Bot Date: Sun, 22 Mar 2026 23:51:55 +0000 Subject: [PATCH] RIPLEY --- .../__pycache__/resolution.cpython-311.pyc | Bin 5292 -> 7573 bytes core/services/resolution.py | 65 +++++++++++++++--- 2 files changed, 55 insertions(+), 10 deletions(-) diff --git a/core/services/__pycache__/resolution.cpython-311.pyc b/core/services/__pycache__/resolution.cpython-311.pyc index 7d58b8cab7fb50a91a384f6ebccf664792cbe7ba..19de47f67babd88d4acca075740ac0475fdf876f 100644 GIT binary patch delta 3599 zcmZ`*YfKy26`rv@IgLx!of+=9{hVTloOUNeSkyR6*S@I&A74mupGsM{Jok>XO zIP9)=D>hBFEVC<6k}CeurnEsS+wG6+zer7@{;6uCSz(Q|693xt7g4uT8f~SXJ78n- zxQ6eZd+xbs&fGb#<=|JH&fhs4RszPyAKn*YC0{$6NTuECRQkzc>nFrHnxlq@ApK2* zM24P813@eh7D6V*@dzBVBXXR{pW%^ls>Htu2inHbDD3PSVC?>BwPr&mzJLgz-?o`r4ymuf;%k(QA z>be70eK&vINwaK50hvl!@_GvhsbqU0ld^folbVQ~sz~B`mu!-$QNs`sAf(dmqcsF- zNHS8H@|fB)7#YyHWGqg2NkyFHF<1jHr20hb9 zt?bt5tEWMW_yl#y~ z#}d&fk2H!$D1m|&RD=bL7g0=O5HC(9;v&3@n7BBpF}d5(X#HwLR1LD|C-?x#pBJ$= z3t)4%Jl)49DJVsClJJ4IN$!^{$3`?+fV@Lz&P?HFR>0 zS=^m**QMQc>!H`T78^8CS~lAUhRrM_=aZ>ZD|_aW8DFdFYsJd;>o8}`mX=c9}K z7x6FSu=JE}5MbxVOIF;m^5R<~rlt*W;*?QMO!?x|cno%RIM_Q2Ec zU1b}D!Ln-`zPA^De*D_;ba~rSH$b|4rfsE3C)zUhcGcdVwzp?JzQy9Xj@iNO-%}VH z6lj83^uzUj-2G;E7x9-aYtMe;19PCK(fA-lV!Dw6jO249h~lJ%-R|E%0(XJnNc?4T zL;mJpMk{H&m!y?FRJr1&4pP@-2F)s8Q1vrOi)=~hxjZMHn>p*wa7C>kY0K>v&d*D7 zw(aEjiWBxN-A!A1ILG!8Fo{ap3m=f9Wpl~_wIFcH!Z~jlcA5wI=vx`;4&M4dk4~HSLD0PQLDysa37(O)2tPb%(1RS|_nmeP@nHm+Y*fz)I1jiH&iH5_ zfMycnqY321>pBee81TnM!6+c)2|mV0B+a@xk0jze&{7yl;*=nbX_Pn_6C|`3%Q8`3 z8j*0OY`_W{0fM&8L_vLUkw;gs7HkU+bkvMxS^$(km;-LzR@4Sd5r1`7`OX}ov52^+ zP)r9|q8w!2+O-Zbb~Ia3zVPn+yD49$B&e1IHwc@<2cXt6R&EL@$xni4X;M%C8{aaYnqX8n6^3pa!%OUB=<`kPmpRR8`pf9I;d z^Imtxe@OKox?iOFyBCd0nkgfzmH#l4Y)@$~Lp)@xy`9E~#s1zltV^)uEAZ8~&?gG0DVwZDp&QA^7rr=zm{Ahb)w?#HC zo9=*)ySg5%OMfeC(j&EWrC_Pkc9GgswDDS z(k4}44Vxo{9sh;F70U#)5gyt0vNi(R2S?I&)y6p>aCNF6aJ|!K6}O{-eLdGOK#hG# zw;(D99V~@#>QFex2(1TdnR4AxX4eCFGkhzQ2aeC2s1~GxE`-x2c+@%P8}&%{uEUst z8&9aa;Y(01@frv9X5`sXkMgy1ch6}oL5^tjB#*>ITr)zsMy*(EKi+YKMKKW(!V*7n zDIC)*`Q>IX;>^gh3hf$lo&Zv;1b>1eY;9JeE}pb2e{THU->-L+cXl|FbjQ$4I!59=G* zy1h%`-&~pNQ@u55Z%wwO63QLO(<>rRpxGfg zN>zxPI{XX>5ogX1^S>-rv|(tYpvVI}F%op~C+C6T+2ZKz zO;slpvFJKEIA8qH$zNhUxT}H>LlbWAtfY$!1WA+t?h7`e7;uq<;W^Xdn1|t-@?G&E zGOm<*-mB1GQ1>g{1$Fav!9nF$o-*q+h~wfT{!jU%=hAR5NrD@^9G~d?9}0H|3>_rA S?ThdP^M2Ugkd#Z_w*LbV?M8M0 delta 1392 zcmZXU?@uE|7{_P2KiXZ|rO=kP^bT4NITr3Xkfs3??jpV1i<25Hfsg}gue;F3(l)cp z59`)D^-MUu#PcSGL^Srr1xdU&2ICtOy+r;242fhDlk=4^FBWf%U|S)TueRx3l(uICoj z;*wr&$m)yv-clNADk^hIZn<~Cv}FfOm^RrgQs-rx$>}*gubRvf%9k6HY8qv-;rVnf zuPVdgge)Nk+0x|m#f1eGnQX2wU&QC_XNOH*Q}b#@H^us#Di&0ePnSw+K`|W*s-Du- zRoxUWuV&Peo+}niPDQAQWD)=43gG+pCLETYa+pxnXwmp(shjtZK%2sENrTXXzm_hx z)p+0!u3dTL3$9ur*4aWMNz zg*SM`5hRDJ4@FSKcWg(`6}w@_UwVd!VSe-YIY))noAEDOD#eWK@w1umO}tL)ZYHu*a=84HSWzN};y0qW{^-T^__W zZ!aG8g?W=Lq!(43@ugr0|LVK#D%*y`vJ27np?>^!%g~D^tEHD!#8IV_;xxrs0$D^| zWHRj;lwQdbCrO%RwG>jdrM#}8QJRG*lP{@ABW__x0=N4^LLW)gEppkPl&q)M$d_{y z)WlA_=~tnMoq>+teP{FgSKqyQd;HG$+Boq|ptASwZ}(^ J8Js#D`xl=)UXuU- diff --git a/core/services/resolution.py b/core/services/resolution.py index 571c2a2..c153a4f 100644 --- a/core/services/resolution.py +++ b/core/services/resolution.py @@ -16,6 +16,37 @@ class WebCrawler: "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" }) + def fetch_url(self, url): + """ + Fetch URL and extract basic metadata and image links. + """ + try: + response = self.session.get(url, timeout=10) + response.raise_for_status() + soup = BeautifulSoup(response.text, "html.parser") + + # Extract meta tags + metadata = { + "title": soup.title.string if soup.title else "No title", + "description": soup.find("meta", attrs={"name": "description"}), + } + if metadata["description"]: + metadata["description"] = metadata["description"].get("content", "") + else: + metadata["description"] = "" + + # Extract images (top 3) + images = [] + for img in soup.find_all("img", limit=3): + src = img.get("src") + if src: + images.append(urljoin(url, src)) + + return metadata, images + except Exception as e: + logger.error(f"Failed to crawl {url}: {e}") + return None, [] + def search(self, query): """ Perform a simulated search on Google using requests. @@ -28,16 +59,18 @@ class WebCrawler: response.raise_for_status() soup = BeautifulSoup(response.text, "html.parser") - # Simple extraction of titles/links from Google search results results = [] - # Selector for Google search result titles for g in soup.select("div.g"): title_elem = g.select_one("h3") link_elem = g.select_one("a") if title_elem and link_elem: + url = link_elem.get("href") + # Handle Google's link redirecting + if url.startswith("/url?q="): + url = url.split("/url?q=")[1].split("&")[0] results.append({ "title": title_elem.get_text(), - "url": link_elem.get("href") + "url": url }) return results except Exception as e: @@ -48,7 +81,7 @@ class NetworkDiscoveryService: @staticmethod def perform_osint_search(query): """ - Performs discovery using Web Crawling. + Performs discovery using Web Crawling, extracting metadata and images. """ try: crawler = WebCrawler() @@ -62,13 +95,22 @@ class NetworkDiscoveryService: value=query, source=source ) + # Default photo fallback person.photo_url = f"https://api.dicebear.com/7.x/pixel-art/svg?seed={query.replace(' ', '+')}" - person.save() - - # 2. Extract potential associates from titles + + # 2. Extract potential associates and crawl their pages for res in search_results: - # Naive associate detection - associate_val = res['title'][:50] + metadata, images = crawler.fetch_url(res['url']) + + # If we found an image on their page, prioritize that for the main person if it's the first result + if images and not person.photo_url.startswith("https://api.dicebear.com"): + person.photo_url = images[0] + elif images and person.photo_url.startswith("https://api.dicebear.com"): + # For demo purposes, set photo from the first relevant page + person.photo_url = images[0] + + # Create associate + associate_val = metadata['title'] if metadata and metadata['title'] != "No title" else res['title'][:50] if associate_val != query: associate, _ = Entity.objects.get_or_create( entity_type='PERSON', @@ -76,6 +118,8 @@ class NetworkDiscoveryService: source=source ) + # Store link/metadata info if you have a field for it + # 3. Create relationship Relationship.objects.get_or_create( source_entity=person, @@ -84,6 +128,7 @@ class NetworkDiscoveryService: weight=0.5 ) + person.save() return person except Exception as e: @@ -94,4 +139,4 @@ class EntityResolutionService: @staticmethod def resolve_identity(identifier_a, identifier_b, probability_threshold=0.8): # Implementation remains unchanged - return True \ No newline at end of file + return True