
    љi{                         S SK r S SKJr  S SKJrJrJrJrJr  S SK	J
r
Jr  S SKJrJrJrJr  S SKJr  \" \5      r " S S5      r " S	 S
\5      rg)    N)	getLogger)OptionalDictListTupleUnion)urlparseparse_qs)async_playwrightBrowserPageTimeoutError)Toolc                      \ rS rSrSSSSSSSS.S\S\S\S	\S
\S\S\4S jjrS rSS\S\	S\	4S jjr
SS\S\S\4S jjrS S\\\\   4   S\\\\4      S\S\\\\   4   4S jjrSS\\\\\\\4      4      S\S\\   4S jjrS rSrg)!
WebScraper   Ngpt-4.1Fdefault_user_agentopenai_api_keyopenai_base_urlopenai_modelreturn_summarysummary_system_promptdebugr   r   r   r   r   r   r   c                   S U l         S U l        [        R                  " 5       U l        U=(       d    SU l        U(       a  SS KnX@l        U=(       d    SU l        SU R                  ;   aJ  [        [        U5      R                  5      R                  SS /5      S   n	UR                  UU	USS9U l        OUR                  X#SS9U l        OS U l        XPl        Xpl        g )	NzMozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1r   z1Summarize the given text in up to 500 characters.azurezapi-version0u  )api_keyapi_versionbase_urltimeout)r   r!   r"   )_playwright_browserasyncioLock_lockr   openair   r   r
   r	   querygetAsyncAzureOpenAIopenai_clientAsyncClientr   r   )
selfr   r   r   r   r   r   r   r(   r    s
             [/home/james-whalen/.local/lib/python3.13/site-packages/aiavatar/sts/llm/tools/webscraper.py__init__WebScraper.__init__   s    +/\\^
"4  #B  9B ,)>)uBuD&$+++&x'@'F'FGKKM\`[abcde%+%<%<* +,!	 &= &" &,%7%7rw%7%x"!%D,
    c                   #    U R                   b  U R                  c  U R                   IS h  vN   U R                   c%  [        5       R	                  5       I S h  vN U l         U R                  c0  U R                   R
                  R                  SS9I S h  vN U l        S S S 5      IS h  vN   g g  N NZ N N! , IS h  vN  (       d  f       g = f7f)NT)headless)r#   r$   r'   r   startchromiumlaunchr.   s    r/   
initializeWebScraper.initialize&   s     #t}}'<zzz##+-=-?-E-E-G'GD$==(*.*:*:*C*C*J*JTX*J*Y$YDM	 "zz (=!'G$Y	 "zzzsh   +CB3C)B;B5<B;B7	B;!C,B9-C5B;7B;9C;CCCCpagemax_attemptsdelay_msc                 6  #    [        U5       HM  nUR                  SSS9I S h  vN   UR                  U5      I S h  vN    UR                  SSS9I S h  vN     g    [        R                  SUR                   35        g  N] NF N/! [         a     M  f = f7f)Nz"document.readyState === 'complete'i'  r"   i  z!ReadyState is not completed for: )rangewait_for_functionwait_for_timeoutr   loggerwarningurl)r.   r;   r<   r=   attempts        r/   _wait_until_fully_rendered%WebScraper._wait_until_fully_rendered.   s     \*G(()MW\(]]]''111,,-Q[_,``` + 	:488*EF ^1 a sU   "BBBBBBBB(BBB
BBBBbodyresearch_goalreturnc                 ~  #     U R                   nU(       a  USU 3-  nU R                  R                  R                  R	                  U R
                  SUS.SUS./S9I S h  vN nUR                  S   R                  R                  $  N'! [         a$  n[        R                  SU 35        Us S nA$ S nAff = f7f)Nz
Consider the following goal when summarizing, as the summary should focus on collecting information relevant to achieving it:
system)rolecontentuser)modelmessagesr   z7Error at _make_summary. Return original body instead.: )r   r,   chatcompletionscreater   choicesmessagerO   	ExceptionrC   error)r.   rI   rJ   system_promptrespexs         r/   _make_summaryWebScraper._make_summary;   s     	 66M  $e  fs  et  "u  u++00<<CC''%-@#5 D  D <<?**222  	LLRSURVWXK	sG   B=AB "B
#&B 	B=
B 
B:B5/B:0B=5B::B=rE   headersc                   #    [        U[        5      (       a,  U R                  U Vs/ s H  oDU4PM     snU5      I S h  vN $ U R                  5       I S h  vN   U R                  R                  U=(       d    SU R                  0S9I S h  vN nUR                  5       I S h  vN n U R                  (       a  [        R                  SU 35        UR                  USS9I S h  vN nUc  [        S5      eUR                  nU R                  U5      I S h  vN    UR                  S5      I S h  vN n	U R                   (       aW  U R"                  (       aF  U R%                  XS9I S h  vN n	U R                  (       a  [        R                  S	U S
U SU	 35        UUU	S.UR+                  5       I S h  vN   $ s  snf  GN GNp GN= GN( N N N Nh! [&         a'  n
[        R)                  SU SU
 35        Sn	 S n
A
NcS n
A
ff = f NR! [&         aD  n
[        R-                  SU SU
 35        US SS.s S n
A
UR+                  5       I S h  vN    $ S n
A
ff = f! UR+                  5       I S h  vN    f = f7f)Nz
User-Agent)extra_http_headersz
Fetching: r   r?   zNo response returnedrI   )rJ   zSummary (url=z, research_goal=z):
zCould not get body from z:  )rE   status_coderI   zFailed to fetch )
isinstancelistfetch_multiple_bodiesr9   r$   new_contextr   new_pager   rC   infogoto
ValueErrorstatusrG   
inner_textr   r,   r]   rX   rD   closerY   )r.   rE   r_   rJ   ucontextr;   responserc   rI   r\   s              r/   
fetch_bodyWebScraper.fetch_bodyO   s.    c4  3334O3a\34OQ^___oo11WEwQ]_c_v_vPw1xx%%''&	"zzj./!YYsEY::H !788"//K11$777	!__V44&&4+=+=!%!3!3D!3!VVDzzmC58HW[\`[a$bc * --/!!Y 5P_x'
 ; 8 5V  !9#bEF& "  	LL+C52$78#  --/!!	 --/!!s.  %I=F?I=G I=G5I=G
I=%G&I=+<H 'G(2H GH  G 4G58G -G.3G !H &I=9H:I=I=
I=I=H H G G 
H	"H?H H		H I=
I I8I9I =I=II=II I:3I64I::I=requestsc           	         ^ #    U 4S jnU VVs/ s H  u  pEU" XEU5      PM     nnn[         R                  " U6 I S h  vN $ s  snnf  N
7f)Nc                 D   >#    TR                  XU5      I S h  vN $  N7fN)rr   )rE   r_   rJ   r.   s      r/   fetch/WebScraper.fetch_multiple_bodies.<locals>.fetch   s     }EEEEs     )r%   gather)r.   rt   rJ   rx   rE   r_   taskss   `      r/   rf    WebScraper.fetch_multiple_bodies   sH     	F IQQs]3Q^^U+++ R+s   A
AA
AA
c                    #    U R                   (       a"  U R                   R                  5       I S h  vN   U R                  (       a#  U R                  R                  5       I S h  vN   g g  N9 N7frw   )r$   rn   r#   stopr8   s    r/   shutdownWebScraper.shutdown   sP     ==--%%'''""'')))  ()s!   /A/A+2A/$A-%A/-A/)	r$   r'   r#   r   r   r,   r   r   r   )      rw   )NN)__name__
__module____qualname____firstlineno__strboolr0   r9   r   intrG   r]   r   r   r   r   dictrr   r   rf   r   __static_attributes__ r2   r/   r   r      sr   48PTmq  HQ  jo  NR  bg c # gj   BE   cg   HK   [_ 4ZGT G G\_ G C 3 (."E#tCy.$9 ."HTRUWZRZ^D\ ."tw ."  DI  JN  PT  UY  PZ  JZ  D[ ."b,DsHTRUWZRZ^D\?\9]4^ ,or ,  C  DH  I ,*r2   r   c                   d   ^  \ rS rSrSSSSSSSSSSSS.S\S\S\S	\S
\S\S\4U 4S jjjrSrU =r$ )WebScraperTool   Nr   F)r   r   r   r   r   r   namespecinstruction
is_dynamicr   r   r   r   r   r   r   r   c                   > [        UUUUUUUS9U l        [        TU ]  U=(       d    SU=(       d#    SU=(       d    SSSSSS0S	S
.SSS.S.S/S.S.S.U R                  R                  U	U
5        g )Nr   get_webpage_bodyfunctionz=Fetch and return the body content of a web page given its URLobjectarraytypestringzList of URLs to scrape.)r   itemsdescriptionzCThe user's goal or question to investigate based on the given URLs.)r   r   )rE   rJ   rE   )r   
propertiesrequired)r   r   
parameters)r   r   )r   web_scrapersuperr0   rr   )r.   r   r   r   r   r   r   r   r   r   r   r   	__class__s               r/   r0   WebScraperTool.__init__   s     &1)+%)"7
 	&& " 6$6#b ( )0$*H*" 0I$ )1/t.' &+G#0 ''9	
r2   )r   )	r   r   r   r   r   r   r0   r   __classcell__)r   s   @r/   r   r      sv     #'"#%$%)6
  6
 	6

 6
 6
 6
  #6
 6
 6
r2   r   )r%   loggingr   typingr   r   r   r   r   urllib.parser	   r
   playwright.async_apir   r   r   r   aiavatar.sts.llmr   r   rC   r   r   r   r2   r/   <module>r      s@      5 5 + N N !	8	@* @*F7
T 7
r2   