
    ȅiC              
          % S SK r S SKrS SKrS SKrS SKrS SKJr  S SKJr  S SK	J
r
  S SKJrJr  S SKJrJr  S SKJrJr  S SKJr  S S	KJrJr  S S
KJr  S SKJr  S SKJrJrJrJ r   \RB                  " \"5      r#\RH                  \%S'   \
" SS9 " S S5      5       r&S\'\(   S\\&   4S jr)S\'\(   S\\&   4S jr*S,S\(S\(S\+\'\(   \\&   4   4S jjr,S\(4S jr-SSSSSS S!S"S#.r. " S$ S%\5      r/ " S& S'\5      r0 " S( S)5      r1S*\2SS4S+ jr3g)-    N)Iterable)ThreadPoolExecutor)	dataclass)BaseHTTPRequestHandlerThreadingHTTPServer)parse_qsurlparse)
DictLoaderEnvironment)tabulate)get_world_sizetcpstore_client)build_db)	JobConfig)
CollectiveGroup
MembershipNCCLCallloggerT)slotsc                   6    \ rS rSr% \\S'   \\S'   S rS rSr	g)Response   status_codetextc                 n    U R                   S:w  a%  [        SU R                    SU R                   35      eg )N   zHTTP z: )r   RuntimeErrorr   selfs    [/home/james-whalen/.local/lib/python3.13/site-packages/torch/distributed/debug/_frontend.pyraise_for_statusResponse.raise_for_status"   s8    s"t'7'7&8499+FGG #    c                 B    [         R                  " U R                  5      $ N)jsonloadsr   r   s    r!   r'   Response.json&   s    zz$))$$r$    N)
__name__
__module____qualname____firstlineno__int__annotations__strr"   r'   __static_attributes__r*   r$   r!   r   r      s    
IH%r$   r   urlsreturnc                    ^ SS K mSnS[        S[        4U4S jjn[        US9 nUR	                  X 5      nS S S 5        U$ ! , (       d  f       W$ = f)Nr      urlr4   c                 f   > TR                  U 5      n[        UR                  UR                  5      $ r&   )postr   r   r   )r7   resprequestss     r!   getfetch_thread_pool.<locals>.get0   s(    }}S!(($))44r$   )max_workers)r;   r1   r   r   map)r3   r>   r<   executorrespsr;   s        @r!   fetch_thread_poolrB   *   sT    K5 5 5 
	4S' 
5 L 
5	4 Ls   A
Ac                    ^^ SS K mSTR                  S[        S[        4S jmS[        [           S[
        [           4UU4S jjn[        R                  " U" U 5      5      $ )Nr   sessionr7   r4   c                    #    U R                  U5       IS h  vN nUR                  5       I S h  vN n[        UR                  U5      sS S S 5      IS h  vN   $  NC N- N	! , IS h  vN  (       d  f       g = f7fr&   )r9   r   r   status)rD   r7   r:   r   s       r!   fetchfetch_aiohttp.<locals>.fetch?   sN     <<$$$DDKK. %$$$ %$$$sT   A<AA<A"AA"
A<A A<A" A<"A9(A+)A95A<r3   c           
        >#    TR                  5        IS h  vN n[        R                  " U  Vs/ s H  nT" X5      PM     sn6 I S h  vN sS S S 5      IS h  vN   $  NGs  snf  N N! , IS h  vN  (       d  f       g = f7fr&   )ClientSessionasynciogather)r3   rD   r7   aiohttprG   s      r!   rL   fetch_aiohttp.<locals>.gatherD   sY     ((**g )N#%*=)NOO +**)NO +***sa   BA BA+A"
A+
A'A+BA)B"A+)B+B1A42B>B)rM   rJ   r1   r   listr   rK   run)r3   rL   rM   rG   s     @@r!   fetch_aiohttprQ   :   sb     /W22 / / /
P49 P(); P P ;;vd|$$r$   endpointargsc                 >   [        5       n[        [        5       5       Vs/ s H  nSU 3PM
     nnUR                  U5      nU Vs/ s H  ofR	                  5        SU  SU 3PM     nn [        U5      nXW4$ s  snf s  snf ! [         a    [        U5      n XW4$ f = f)Nrankz	/handler/?)r   ranger   	multi_getdecoderQ   ImportErrorrB   )rR   rS   storerkeysaddrsaddrrA   s           r!   	fetch_allr`   K   s    E %n&6 78 71d1#J 7D8OOD!EFKLediz4&9eEL)e$ < 9L  )!%(<)s   A8!A=*B BBblobc                 X    [         R                  " U 5      n[         R                  " USS9$ )N   )indent)r'   r(   dumps)ra   parseds     r!   format_jsonrg   Y   s!    ZZF::fQ''r$   a  
<!doctype html>
<head>
    <title>{% block title %}{% endblock %} - PyTorch Distributed</title>
    <link rel="shortcut icon" type="image/x-icon" href="https://pytorch.org/favicon.ico?">

    <style>
        body {
            margin: 0;
            font-family:
                -apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,
                "Helvetica Neue",Arial,"Noto Sans",sans-serif,"Apple Color Emoji",
                "Segoe UI Emoji","Segoe UI Symbol","Noto Color Emoji";
            font-size: 1rem;
            font-weight: 400;
            line-height: 1.5;
            color: #212529;
            text-align: left;
            background-color: #fff;
        }
        h1, h2, h2, h4, h5, h6, .h1, .h2, .h2, .h4, .h5, .h6 {
            margin-bottom: .5rem;
            font-weight: 500;
            line-height: 1.2;
        }
        nav {
            background-color: rgba(0, 0, 0, 0.17);
            padding: 10px;
            display: flex;
            align-items: center;
            padding: 16px;
            justify-content: flex-start;
        }
        nav h1 {
            display: inline-block;
            margin: 0;
        }
        nav a {
           margin: 0 8px;
        }
        section {
            max-width: 1280px;
            padding: 16px;
            margin: 0 auto;
        }
        pre {
            white-space: pre-wrap;
            max-width: 100%;
        }
    </style>
</head>

<nav>
    <h1>Torch Distributed Debug Server</h1>

    <a href="/">Home</a> <!--@lint-ignore-->
    <a href="/stacks">Python Stack Traces</a> <!--@lint-ignore-->
    <a href="/pyspy_dump">py-spy Stacks</a> <!--@lint-ignore-->
    <a href="/fr_trace">FlightRecorder CPU</a> <!--@lint-ignore-->
    <a href="/fr_trace_json">(JSON)</a> <!--@lint-ignore-->
    <a href="/fr_trace_nccl">FlightRecorder NCCL</a> <!--@lint-ignore-->
    <a href="/fr_trace_nccl_json">(JSON)</a> <!--@lint-ignore-->
    <a href="/profile">torch profiler</a> <!--@lint-ignore-->
    <a href="/wait_counters">Wait Counters</a> <!--@lint-ignore-->
    <a href="/tcpstore">TCPStore</a> <!--@lint-ignore-->
</nav>

<section class="content">
  {% block header %}{% endblock %}
  {% block content %}{% endblock %}
</section>
    z
{% extends "base.html" %}
{% block header %}
  <h1>{% block title %}Index{% endblock %}</h1>
{% endblock %}
{% block content %}
Hi
{% endblock %}
    a  
{% extends "base.html" %}
{% block header %}
    <h1>{% block title %}{{title}}{% endblock %}</h1>
{% endblock %}
{% block content %}
    {% for i, (addr, resp) in enumerate(zip(addrs, resps)) %}
        <h2>Rank {{ i }}: {{ addr }}</h2>
        {% if resp.status_code != 200 %}
            <p>Failed to fetch: status={{ resp.status_code }}</p>
            <pre>{{ resp.text }}</pre>
        {% else %}
            <pre>{{ resp.text }}</pre>
        {% endif %}
    {% endfor %}
{% endblock %}
    a  
{% extends "base.html" %}
{% block header %}
    <h1>{% block title %}{{ title }}{% endblock %}</h1>
{% endblock %}
{% block content %}
    {% for i, (addr, resp) in enumerate(zip(addrs, resps)) %}
        <h2>Rank {{ i }}: {{ addr }}</h2>
        {% if resp.status_code != 200 %}
            <p>Failed to fetch: status={{ resp.status_code }}</p>
            <pre>{{ resp.text }}</pre>
        {% else %}
            <pre>{{ format_json(resp.text) }}</pre>
        {% endif %}
    {% endfor %}
{% endblock %}
    a  
{% extends "base.html" %}
{% block header %}
    <h1>{% block title %}torch.profiler{% endblock %}</h1>
{% endblock %}

{% block content %}
    <form action="" method="get">
        <label for="duration">Duration (seconds):</label>
        <input type="number" id="duration" name="duration" value="{{ duration }}" min="1" max="60">
        <input type="submit" value="Submit">
    </form>

    <script>
    function stringToArrayBuffer(str) {
        const encoder = new TextEncoder();
        return encoder.encode(str).buffer;
    }
    async function openPerfetto(data) {
        const ui = window.open('https://ui.perfetto.dev/#!/');
        if (!ui) { alert('Popup blocked. Allow popups for this page and click again.'); return; }

        // Perfetto readiness handshake: PING until we receive PONG
        await new Promise((resolve, reject) => {
        const onMsg = (e) => {
            if (e.source === ui && e.data === 'PONG') {
            window.removeEventListener('message', onMsg);
            clearInterval(pinger);
            resolve();
            }
        };
        window.addEventListener('message', onMsg);
        const pinger = setInterval(() => { try { ui.postMessage('PING', '*'); } catch (_e) {} }, 250);
        setTimeout(() => { clearInterval(pinger); window.removeEventListener('message', onMsg); reject(); }, 20000);
        }).catch(() => { alert('Perfetto UI did not respond. Try again.'); return; });

        ui.postMessage({
        perfetto: {
            buffer: stringToArrayBuffer(JSON.stringify(data)),
            title: "torch profiler",
            fileName: "trace.json",
        }
        }, '*');
    }
    </script>

    {% for i, (addr, resp) in enumerate(zip(addrs, resps)) %}
        <h2>Rank {{ i }}: {{ addr }}</h2>
        {% if resp.status_code != 200 %}
            <p>Failed to fetch: status={{ resp.status_code }}</p>
            <pre>{{ resp.text }}</pre>
        {% else %}
            <script>
            function run{{ i }}() {
                var data = {{ resp.text | safe }};
                openPerfetto(data);
            }
            </script>

            <button onclick="run{{ i }}()">View {{ i }}</button>
        {% endif %}
    {% endfor %}
{% endblock %}
    a  
{% extends "base.html" %}
{% block header %}
    <h1>{% block title %}TCPStore Keys{% endblock %}</h1>
{% endblock %}
{% block content %}
    <pre>
    {% for k, v in zip(keys, values) -%}
{{ k }}: {{ v | truncate(100) }}
    {% endfor %}
    </pre>
{% endblock %}
    ag  
{% extends "base.html" %}
{% block header %}
    <h1>{% block title %}{{ title }}{% endblock %}</h1>
{% endblock %}
{% block content %}
    <h2>Groups</h2>
    {{ groups | safe }}
    <h2>Memberships</h2>
    {{ memberships | safe }}
    <h2>Collectives</h2>
    {{ collectives | safe }}
    <h2>NCCL Calls</h2>
    {{ ncclcalls | safe }}
{% endblock %}
    aQ  
{% extends "base.html" %}
{% block header %}
    <h1>{% block title %}py-spy Stack Traces{% endblock %}</h1>
{% endblock %}
{% block content %}
    <form action="" method="get">
        <input type="checkbox" id="native" name="native" value="1"/>
        <label for="native">Native</label>
        <input type="checkbox" id="subprocesses" name="subprocesses" value="1"/>
        <label for="subprocesses">Subprocesses</label>
        <input type="submit" value="Submit">
    </form>

    {% for i, (addr, resp) in enumerate(zip(addrs, resps)) %}
        <h2>Rank {{ i }}: {{ addr }}</h2>
        {% if resp.status_code != 200 %}
            <p>Failed to fetch: status={{ resp.status_code }}</p>
            <pre>{{ resp.text }}</pre>
        {% else %}
            <pre>{{ resp.text }}</pre>
        {% endif %}
    {% endfor %}
{% endblock %}
    )z	base.html
index.htmlraw_resp.htmljson_resp.htmlprofile.htmltcpstore.htmlfr_trace.htmlpyspy_dump.htmlc                   Z    \ rS rSr% \R
                  r\R                  \S'   Sr	\
\S'   Srg)_IPv6HTTPServeriK  address_familyi   request_queue_sizer*   N)r+   r,   r-   r.   socketAF_INET6rq   AddressFamilyr0   rr   r/   r2   r*   r$   r!   rp   rp   K  s!    +1??NF((:""r$   rp   c            	           \ rS rSr% S\S'   S rS rS\4S jrS\	\\
\   4   4S jrS\4S	 jrS
\4S\S\S\S\4S jjrSrg
)HTTPRequestHandleriP  FrontendServerfrontendc                 P    [         R                  SU R                  S   X-  5        g )Nz%s %sr   )r   infoclient_address)r    formatrS   s      r!   log_messageHTTPRequestHandler.log_messageS  s$    "M	
r$   c                 :    U R                   R                  U 5        g r&   )ry   _handle_requestr   s    r!   do_GETHTTPRequestHandler.do_GETZ  s    %%d+r$   r4   c                 @    [        U R                  5      R                  $ r&   )r	   pathr   s    r!   get_pathHTTPRequestHandler.get_path]  s    		"'''r$   c                 4    [        U R                  5       5      $ r&   )r   get_raw_queryr   s    r!   	get_queryHTTPRequestHandler.get_query`  s    **,--r$   c                 @    [        U R                  5      R                  $ r&   )r	   r   queryr   s    r!   r    HTTPRequestHandler.get_raw_queryc  s    		"(((r$   Nnamedefaulttypec                 J    U R                  5       nX;  a  U$ U" XA   S   5      $ )Nr   )r   )r    r   r   r   r   s        r!   get_query_arg HTTPRequestHandler.get_query_argf  s,      NEKN##r$   r*   )r+   r,   r-   r.   r0   r~   r   r1   r   dictrO   r   r   objectr   r   r2   r*   r$   r!   rw   rw   P  st    
,(# (.4T#Y/ .)s ) ,0c$$"($7;$	$ $r$   rw   c                   4   \ rS rSrS\4S jrSS jrSS jrS\SS4S	 jr	S
\
S\S\4S jrS\S\4S jrS\S\4S jrS\S\4S jrS\\
   S\\   S\4S jrS\S\4S jrS\S\4S jrS\S\4S jrS\S\4S jrS\S\4S jrS\S\4S jrS\S\4S jrSrg)rx   io  portc                 ^   [        [        5      n[        USS9U l        U R                  R                  R                  [        [        [        S9  U R                  U R                  U R                  U R                  U R                  U R                  U R                  U R                   U R"                  U R$                  S.
U l        [)        S[*        4SU 05      nSU4n[-        XC5      U l        [0        R2                  " U R4                  SSS	S
9U l        U R6                  R9                  5         g )NT)loaderenable_async)ziprg   	enumerate)
/z/stacksz/pyspy_dumpz	/fr_tracez/fr_trace_jsonz/fr_trace_ncclz/fr_trace_nccl_jsonz/profilez/wait_countersz	/tcpstorerw   ry    r*   z distributed.debug.FrontendServer)targetrS   daemonr   )r
   	templatesr   
_jinja_envglobalsupdater   rg   r   _handle_index_handle_stacks_handle_pyspy_dump_handle_fr_trace_handle_fr_trace_json_handle_fr_trace_nccl_handle_fr_trace_nccl_json_handle_profiler_handle_wait_counters_handle_tcpstore_routesr   rw   rp   _server	threadingThread_serve_threadstart)r    r   r   RequestHandlerClassserver_addresss        r!   __init__FrontendServer.__init__p  s   I&%V$G&&# 	' 	
 ##**22.."88"88#'#B#B--"88..
 # !
 d&~K '';;3	
 	r$   r4   Nc                      U R                   R                  5         g ! [         a    [        R	                  S5         g f = f)Nz got exception in frontend server)r   serve_forever	Exceptionr   	exceptionr   s    r!   r   FrontendServer._serve  s8    	ALL&&( 	A?@	As    ??c                 8    U R                   R                  5         g r&   )r   joinr   s    r!   r   FrontendServer.join  s    r$   reqc                    UR                  5       nX R                  ;  a  UR                  SSU 35        g U R                  U   n U" U5      nUR                  S5        UR                  SS5        UR                  5         UR                  R                  U5        g ! [        [        4 a>  n[
        R                  SU5        UR                  SS[        U5       35         S nAg S nAff = f)	Ni  zHandler not found: z-Exception in frontend server when handling %si  zException: r   zContent-typez	text/html)r   r   
send_errorr   
SystemExitr   r   reprsend_responsesend_headerend_headerswfilewrite)r    r   r   handlerr:   es         r!   r   FrontendServer._handle_request  s    ||~||#NN3"5dV <=,,t$		3<D 	#4		 :& 	? NN3+d1gY 78	s   B C+-4C&&C+templatekwargsc                 t    U R                   R                  U5      R                  " S0 UD6R                  5       $ )Nr*   )r   get_templaterenderencode)r    r   r   s      r!   _render_templateFrontendServer._render_template  s.    ++H5<<FvFMMOOr$   c                 $    U R                  S5      $ )Nrh   )r   )r    r   s     r!   r   FrontendServer._handle_index  s    $$\22r$   c                 >    [        S5      u  p#U R                  SSX#S9$ )Ndump_tracebackri   Stackstitler^   rA   r`   r   r    r   r^   rA   s       r!   r   FrontendServer._handle_stacks  s.     !12$$85 % 
 	
r$   c                 \    [        SUR                  5       5      u  p#U R                  SUUS9$ )N
pyspy_dumprn   r^   rA   )r`   r   r   r   s       r!   r   !FrontendServer._handle_pyspy_dump  s:     s/@/@/BC$$ % 
 	
r$   r^   rA   c                    [        5       nUR                  / S9nSUl        SUl        0 n[	        U5       H?  u  pgUR                  5         UX   S.UR                  5       EnSU;  a  / US'   XSU S3'   MA     [        [        UR                  5       5      5      S   n	[        XTU	5      n
U R                  SS	[        U
R                  [        R                  S
S9[        U
R                   ["        R                  S
S9[        U
R$                  [&        R                  S
S9[        U
R(                  [*        R                  S
S9S9$ )N)rS   T)rU   	host_nameentriesrU   z.jsonversionrm   FlightRecorderhtml)headerstablefmt)r   groupsmembershipscollectives	ncclcalls)r   
parse_argsallow_incomplete_ranksverboser   r"   r'   nextitervaluesr   r   r   r   r   _fieldsr   r   r   r   r   r   )r    r^   rA   configrS   detailsrU   r:   dumpr   dbs              r!   _render_fr_traceFrontendServer._render_fr_trace  s8     b )&*##E*JD!!#"[ ))+D
 $"$Y*.d4&&' + tGNN,-.y9gW-$$"BIIu}}vN 
(:(:V !
(:(:V r||X5E5EPVW % 
 	
r$   c                 R    [        S5      u  p#U R                  U[        U5      5      $ )Nfr_trace_jsonr`   r   rO   r   s       r!   r   FrontendServer._handle_fr_trace  s%     1$$UDK88r$   c                 @    [        S5      u  p#U R                  SSUUS9$ )Nr   rj   r   r   r   r   s       r!   r   $FrontendServer._handle_fr_trace_json  s2     1$$"	 % 
 	
r$   c                 T    [        SS5      u  p#U R                  U[        U5      5      $ )Ndump_nccl_trace_jsononlyactive=truer  r   s       r!   r   $FrontendServer._handle_fr_trace_nccl  s)     !79JK$$UDK88r$   c                 B    [        SS5      u  p#U R                  SSUUS9$ )Nr  r  rj   zFlightRecorder NCCLr   r   r   s       r!   r   )FrontendServer._handle_fr_trace_nccl_json  s6     !79JK$$'	 % 
 	
r$   c                 n    UR                  SS[        S9n[        SSU 35      u  p4U R                  SX4S9$ )Ndurationg      ?)r   r   torch_profilez	duration=rk   r   )r   floatr`   r   )r    r   r  r^   rA   s        r!   r   FrontendServer._handle_profiler  sD    $$Z5$I IhZ2HI$$^5$NNr$   c                 >    [        S5      u  p#U R                  SSX#S9$ )Nwait_counter_valuesrj   zWait Countersr   r   r   s       r!   r   $FrontendServer._handle_wait_counters  s.     !67$$O5 % 
 	
r$   c                     [        SS9nUR                  5       nUR                  5         UR                  U5       Vs/ s H  n[	        U5      PM     nnU R                  SX5S9$ s  snf )Nr   )prefixrl   )r]   r   )r   	list_keyssortrX   r   r   )r    r   r[   r]   vr   s         r!   r   FrontendServer._handle_tcpstore  s_    r* 		#(??4#89#8a$q'#89$$_4$OO :s   A$)r   r   r   r   )r4   N)r+   r,   r-   r.   r/   r   r   r   rw   r   r1   r   bytesr   r   r   r   rO   r   r   r   r   r   r   r   r   r   r2   r*   r$   r!   rx   rx   o  s=   (S (TA#5 $ .P P P5 P3!3 3 3
"4 
 

&8 
U 
"
d3i "
X "
5 "
H9$6 95 9

); 
 
9); 9 9

.@ 
U 
O$6 O5 O
); 
 
P$6 P5 Pr$   rx   r   c                     [         R                  [        R                  5        [	        U S9n[         R                  SUR                  R                  5        UR                  5         g )N)r   z"Frontend server started on port %d)	r   setLevelloggingINFOrx   r{   r   server_portr   )r   servers     r!   mainr   %  s>    
OOGLL!&F
KK4fnn6P6PQ
KKMr$   )r   )4rK   r'   r  rs   r   collections.abcr   concurrent.futuresr   dataclassesr   http.serverr   r   urllib.parser   r	   jinja2r
   r   r   torch.distributed.debug._storer   r   4torch.distributed.flight_recorder.components.builderr   ;torch.distributed.flight_recorder.components.config_managerr   2torch.distributed.flight_recorder.components.typesr   r   r   r   	getLoggerr+   r   Loggerr0   r   rO   r1   rB   rQ   tupler`   rg   r   rp   rw   rx   r/   r   r*   r$   r!   <module>r.     sW        $ 1 ! C + *  J I Q  !**84 4 	% 	% 	%DI (8*<  %S	 %hx&8 %" 3 d3i(AS6S0T (c (GP""?@ cj	Z#) #
$/ $>sP sPls t r$   