
    bCiZ!                         S SK r S SKJrJr  S SKJr  S SKJr  S SKrSSK	J
r
  SSKJr  \
R                  " S5      rS	\\R                   \\\4   4S
 jr " S S\5      r\ " S S5      5       r\ " S S5      5       rg)    N)	dataclassfield)Enum)Optional   )logging)tracedContinuousBatchingLoggerreturnc                  `   [         R                  R                  5       (       a  [         R                  " S5      n [         R                  R	                  5         [         R                  R                  5         [         R                  R                  U 5      R                  n[         R                  R                  U 5      n[         R                  R                  U 5      nO[         R                  R                  R                  5       (       a  [         R                  R                  R                  5       (       aX  [         R                  " S5      n [         R                  R                  5       nU[         R                  R                  5       -
  nSnO[         R                  " S5      n S nSnSnXX#4$ )Ncudampsr   cpu)torchr   is_availabledeviceempty_cachesynchronizeget_device_propertiestotal_memorymemory_reservedmemory_allocatedbackendsr   is_builtdriver_allocated_memoryrecommended_max_memory)r   r   reserved_memoryallocated_memorys       n/home/james-whalen/.local/lib/python3.13/site-packages/transformers/generation/continuous_batching/requests.pyget_device_and_memory_breakdownr       s   zz  f%

 

 zz77?LL**44V< ::66v>				(	(	*	*u~~/A/A/J/J/L/Le$yy88:'%))*J*J*LLe$BB    c                   4    \ rS rSrSrSrSrSrSrSr	Sr
S	rS
rg)RequestStatus5   z5Status of a generation request through its lifecycle.pending
prefillingprefilling_splitsplit_pending_remainderdecodingfinishedfailed N)__name__
__module____qualname____firstlineno____doc__PENDING
PREFILLINGPREFILLING_SPLITSPLIT_PENDING_REMAINDERDECODINGFINISHEDFAILED__static_attributes__r,   r!   r   r#   r#   5   s*    ?GJ)7HHFr!   r#   c                       \ rS rSr% Sr\\S'   \" \S9r	\\
   \S'   \" \S9r\\
   \S'   \" \S9r\\   \S'   Sr\\   \S	'   \R"                  r\\S
'   \" \R&                  S9r\\S'   Srg)GenerationOutputA   a  Tracks the output of a generation request.

Attributes:
    request_id (str): The ID of the generation request.
    prompt_ids (list[int]): The IDs of the prompt tokens.
    generated_tokens (list[int]): The generated tokens.
    logprobs (list[float]): The log probabilities of the generated tokens.
    error (Optional[str]): Any error message associated with the request. When None, the request was successful.
    status (RequestStatus): The status of the request.
    created_time (float): The time the request was created.

request_iddefault_factory
prompt_idsgenerated_tokenslogprobsNerrorstatuscreated_timer,   )r-   r.   r/   r0   r1   str__annotations__r   listr@   intrA   rB   floatrC   r   r#   r2   rD   timerE   r9   r,   r!   r   r;   r;   A   sy    
 O!$7JS	7"'"=d3i=!$7Hd5k7E8C=)11FM1		:L%:r!   r;   c                      \ rS rSr% Sr\\S'   Sr\\	\
      \S'   Sr\\	\
      \S'   \" \	S9r\	\
   \S'   \" \	S9r\	\
   \S	'   S
r\
\S'   S
r\
\S'   \R$                  r\\S'   Sr\
\S'   Sr\
\S'   \" \R,                  S9r\\S'   Sr\\   \S'   Sr\\\4   \S'   \S\4S j5       r\R<                  S\4S j5       rS rS\
4S jr S\
4S jr!\"S\
S\#4S j5       r$S r%S  r&S!r'g)"RequestStateX   a=  Tracks the state of a generation request through its lifecycle.

Attributes:
    request_id (str): The ID of the generation request.
    full_prompt_ids (list[int] | None): The tokens IDs of the full prompt.
    prompt_ids (list[int] | None): The tokens IDs currently being processed.
    remaining_prompt_ids (list[int]): The tokens IDs remaining to be processed (for split requests).
    static_outputs (list[int]): The generated tokens.
    allocated_blocks (int): The number of blocks allocated to the request.
    position_offset (int): The current position in the sequence for position_ids.
    status (RequestStatus): The status of the request: can be one of PENDING, PREFILLING, PREFILLING_SPLIT,
                            SPLIT_PENDING_REMAINDER, DECODING, FINISHED, FAILED
    max_new_tokens (int): The maximum number of new tokens to generate.
    eos_token_id (int): The ID of the end-of-sequence token.
    created_time (float): The time the request was created.
    error (Optional[str]): Any error message associated with the request. When None, has had no error yet.
r=   Nfull_prompt_idsr@   r>   remaining_prompt_idsstatic_outputsr   allocated_blocksposition_offset_status   max_new_tokenseos_token_idrE   rC   )rW   rW   lifespanr   c                     U R                   $ )N)rT   selfs    r   rD   RequestState.status{   s    ||r!   valuec                     U R                   [        R                  :X  a  [        R                  " 5       S4U l        OMU[        R
                  :X  a9  U R                  S   [        R                  " 5       4U l        U R                  5         Xl         g )NrW   r   )rT   r#   r2   rK   rY   r7   log_end_of_request)r\   r^   s     r   rD   r]      s`    <<=000!YY["-DMm,,,!]]1-tyy{;DM##%r!   c                 "   [        U R                  5      nU R                  5       nU R                  S   U R                  -
  nU R                  S   U R                  -
  n[
        R                  SU R                   SU< SU< SU< SU< 3
5        g )Nr      zRequest z finished: prefill_len = z decode_len = z start_time = z end_time = )lenrO   generated_lenrY   rE   loggerinfor=   )r\   prefill_len
decode_len
start_timeend_times        r   r`   RequestState.log_end_of_request   s    $../'')
]]1%(9(99
==#d&7&77t''A;2B/J?RaT^Sbbodlcpq	
r!   c                     U R                   $ )zCGet the current length of the sequence (prompt + generated tokens).)rS   r[   s    r   current_lenRequestState.current_len   s    ###r!   c                 ,    [        U R                  5      $ )z*Get the number of tokens generated so far.)rc   rQ   r[   s    r   rd   RequestState.generated_len   s    4&&''r!   token_idc                 `   U R                   [        R                  :w  a  gXR                  :H  =(       a    U R                  S:g  nU R	                  5       U R
                  :  nU(       a  U(       a  U R                  R                  U/5        U(       d  U(       a  [        R                  U l         gg)zUpdate the request with a newly generated token and check for completion.

Args:
    token_id: The token ID to add to the output sequence

Returns:
    bool: True if the request is now complete, False otherwise
FrW   T)	rD   r#   r6   rX   rd   rV   rQ   extendr7   )r\   rq   is_eos
is_max_lens       r   update_with_tokenRequestState.update_with_token   s     ;;-000...J43D3D3J'')T-@-@@
 6&&z2Z'00DKr!   c           
      n   SU R                    3SU R                   3SU R                  5        3S[        U R                  5       3S[        U R
                  5       3SU R                   3S[        U R                  5       3SU R                   3S	U R                   3/	nS
SR                  U5      -   S-   $ )Nzrequest_id=zstatus=zout_tokens=zquery_length=zremaining_tokens=z
kv_length=zfull_prompt_length=zallocated_blocks=zgenerated_tokens=zRequestState(
	z,
	z
))r=   rT   rd   rc   r@   rP   rS   rO   rR   rQ   join)r\   msgs     r   __repr__RequestState.__repr__   s    $//*+dll^$$,,./0C012D$=$= >?@--./!#d&:&:";!<= 5 567 3 345

 #W\\#%66>>r!   c           	          [        U R                  U R                  U R                  U R                  / U R
                  S9$ )z7Convert the request state to a GenerationOutput object.)r=   r@   rD   rA   rB   rC   )r;   r=   rO   rD   rQ   rC   r[   s    r   to_generation_output!RequestState.to_generation_output   s9    ++;;!00**
 	
r!   )rT   rY   rD   )(r-   r.   r/   r0   r1   rF   rG   rO   r   rH   rI   r@   r   rP   rQ   rR   rS   r#   r2   rT   rV   rX   rK   rE   rJ   rC   rY   tuplepropertyrD   setterr`   rm   rd   r	   boolrv   r{   r~   r9   r,   r!   r   rM   rM   X   sI   & O+/OXd3i(/&*Jc#*&+D&A$s)A %d ;NDI;cOS*22G]2NCL#		:L%:E8C=$,HeE5L!,   ]]M  
$S $(s (
 # $  4?	
r!   rM   )rK   dataclassesr   r   enumr   typingr   r   utils.loggingr   utils.metricsr	   	getLoggerre   r   r   rI   r    r#   r;   rM   r,   r!   r   <module>r      s     (    $ # 
		5	6Cu||S#s/J)K C,	D 	 ; ; ;, s
 s
 s
r!   