
    V|h,                        S r SSKrSSKrSSKJrJr   " S S5      r " S S\5      r " S	 S
\5      r	\R                  " S5      r\R                  " S5      r\R                  " S5      r\R                  " S5      r\R                  " S5      rS rS r " S S5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      r " S S\5      rS  rg)!u  
A partial parser for WebVTT segments. Interprets enough of the WebVTT stream
to be able to assemble a single stand-alone subtitle file, suitably adjusting
timestamps on the way, while everything else is passed through unmodified.

Regular expressions based on the W3C WebVTT specification
<https://www.w3.org/TR/webvtt1/>. The X-TIMESTAMP-MAP extension is described
in RFC 8216 §3.5 <https://tools.ietf.org/html/rfc8216#section-3.5>.
    N   )int_or_nonetimetuple_from_msecc                   6    \ rS rSrSrS rS rS rS rS r	Sr
g	)
_MatchParser   z
An object that maintains the current parsing position and allows
conveniently advancing it as syntax elements are successfully parsed.
c                     Xl         SU l        g Nr   _data_pos)selfstrings     G/home/james-whalen/.local/lib/python3.13/site-packages/yt_dlp/webvtt.py__init___MatchParser.__init__   s    
	    c                 8   [        U[        R                  5      (       a&  UR                  U R                  U R
                  5      $ [        U[        5      (       a6  U R                  R                  XR
                  5      (       a  [        U5      $ g [        U5      eN)

isinstancerePatternmatchr   r   str
startswithlen
ValueErrorr   rs     r   r   _MatchParser.match   sh    a$$774::tyy11azz$$Q		221vmr   c                 8   Uc  SnO~[        U[        R                  5      (       a  [        UR	                  S5      5      nOD[        U[
        5      (       a  [        U5      nO#[        U[        5      (       a  UnO[        U5      eU =R                  U-  sl        U$ r
   )	r   r   Matchr   groupr   intr   r   )r   byamts      r   advance_MatchParser.advance$   st    :CBHH%%bhhqk"CC  b'CC  CR. 		S		r   c                 B    U R                  U R                  U5      5      $ r   )r'   r   r   s     r   consume_MatchParser.consume2   s    ||DJJqM**r   c                     [        U 5      $ r   )_MatchChildParserr   s    r   child_MatchParser.child5   s     &&r   r   N)__name__
__module____qualname____firstlineno____doc__r   r   r'   r*   r/   __static_attributes__ r   r   r   r      s     
+'r   r   c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )r-   9   z
A child parser state, which advances through the same data as
its parent, but has an independent position. This is useful when
advancing through syntax elements we might later want to backtrack
from.
c                 f   > [         TU ]  UR                  5        Xl        UR                  U l        g r   )superr   r   _MatchChildParser__parentr   )r   parent	__class__s     r   r   _MatchChildParser.__init__A   s$    &KK	r   c                 P    U R                   U R                  l         U R                  $ )zG
Advance the parent state to the current position of this child state.
)r   r<   r.   s    r   commit_MatchChildParser.commitF   s     "YY}}r   )__parentr   )	r1   r2   r3   r4   r5   r   rA   r6   __classcell__r>   s   @r   r-   r-   9   s     
 r   r-   c                   (   ^  \ rS rSrU 4S jrSrU =r$ )
ParseErrorN   c                    > UR                   UR                  UR                  S-    n[        TU ]  SUR                   SU< S35        g )Nd   zParse error at position z (near ))r   r   r;   r   )r   parserdatar>   s      r   r   ParseError.__init__O   sA    ||FKKc(9:3FKK=xqQRr   r7   )r1   r2   r3   r4   r   r6   rD   rE   s   @r   rG   rG   N   s    S Sr   rG   zL(?x)
    (?:([0-9]{1,}):)?
    ([0-9]{2}):
    ([0-9]{2})\.
    ([0-9]{3})?
z\Zz(?:\r\n|[\r\n]|$)z(?:\r\n|[\r\n])+z[ \t]*c                 \    S[        S [        U R                  5       S5       5       5      -  $ )z
Convert a parsed WebVTT timestamp (a re.Match obtained from _REGEX_TS)
into an MPEG PES timestamp: a tick counter at 90 kHz resolution.
Z   c              3   T   #    U  H  u  p[        U=(       d    S 5      U-  v   M      g7f)r   N)r$   ).0partmults      r   	<genexpr>_parse_ts.<locals>.<genexpr>i   s$      _0]*$DIA0]s   &()i6 i`  i  r   )sumzipgroupstss    r   	_parse_tsr\   d   s8    
  _03BIIKA\0]_ _ _ _r   c                 <    S[        [        U S-   S-  5      5      -  $ )zb
Convert an MPEG PES timestamp into a WebVTT timestamp.
This will lose sub-millisecond precision.
z%02u:%02u:%02u.%03u-   rP   )r   r$   rZ   s    r   
_format_tsr_   m   s"    
 !#6sBG?7K#LLLr   c                   4    \ rS rSrSrS r\S 5       rS rSr	g)Blocku   z
An abstract WebVTT block.
c                 N    UR                  5        H  u  p#[        XU5        M     g r   )itemssetattr)r   kwargskeyvals       r   r   Block.__init__z   s    HCDs# 'r   c                     UR                  U R                  5      nU(       d  g UR                  U5        U " UR                  S5      S9$ )Nr   )raw)r   _REGEXr'   r#   )clsrL   ms      r   parseBlock.parse~   s9    LL$qqwwqz""r   c                 :    UR                  U R                  5        g r   )writerk   r   streams     r   
write_intoBlock.write_into   s    TXXr   r7   N)
r1   r2   r3   r4   r5   r   classmethodro   ru   r6   r7   r   r   ra   ra   u   s%    $ # #r   ra   c                       \ rS rSrSrSrg)HeaderBlock   za
A WebVTT block that may only appear in the header part of the file,
i.e. before any cue blocks.
r7   N)r1   r2   r3   r4   r5   r6   r7   r   r   ry   ry      s     	r   ry   c                      \ rS rSr\R
                  " S5      r\R
                  " S5      r\R
                  " S5      r\R
                  " S5      r	\R
                  " S5      r
\R
                  " S5      r\S 5       r\S	 5       rS
 rSrg)Magic   z,\ufeff?WEBVTT([ \t][^\r\n]*)?(?:\r\n|[\r\n])zX-TIMESTAMP-MAP=zLOCAL:zMPEGTS:([0-9]+)z[ \t]*,[ \t]*z6(?:(?!-->)[^\r\n])+:(?:(?!-->)[^\r\n])+(?:\r\n|[\r\n])c                 F   UR                  5       n UR                  U R                  5      nU(       a=  UR                  [        5      nUc  [	        U5      e[        U5      nUc  [	        U5      eOVUR                  U R                  5      nU(       a)  [        UR                  S5      5      nUc  [	        U5      eO[	        U5      eUR                  U R                  5      (       a  M  UR                  [        5      (       a  O[	        U5      eUR                  5         WW4$ )Nr   )r/   r*   _REGEX_TSMAP_LOCAL	_REGEX_TSrG   r\   _REGEX_TSMAP_MPEGTSr   r#   _REGEX_TSMAP_SEP	_REGEX_NLrA   )rm   rL   rn   localmpegtss        r   __parse_tsmapMagic.__parse_tsmap   s    s556ANN9-9$V,,!!=$V,, ! NN3#:#:;(4F~(00 & %V,,~~c2233~~i((V$$f}r   c                    UR                  5       nUR                  U R                  5      nU(       d  [        U5      eUR	                  S5      nSu  pEnUR                  [
        5      (       dw  UR                  U R                  5      (       a  U R                  U5      u  pEMO  UR                  U R                  5      nU(       a  XbR	                  S5      -  nM  [        U5      eUR                  5         U " X5XFS9$ )Nr   )NN r   )extrar   r   meta)
r/   r*   rl   rG   r#   r   _REGEX_TSMAP_Magic__parse_tsmap_REGEX_METArA   )rm   rL   rn   r   r   r   r   s          r   ro   Magic.parse   s    NN3::&V$$
,t..++~~c..// # 1 1& 9s/A
"V$$UFFr   c                    UR                  S5        U R                  b  UR                  U R                  5        UR                  S5        U R                  (       d  U R                  (       a  UR                  S5        UR                  [	        U R                  b  U R                  OS5      5        UR                  S5        UR                  [        U R                  b  U R                  OS5      5        UR                  S5        U R                  (       a  UR                  U R                  5        UR                  S5        g )NWEBVTT
zX-TIMESTAMP-MAP=LOCAL:r   z,MPEGTS:)rr   r   r   r   r_   r   r   rs   s     r   ru   Magic.write_into   s    X::!LL$T::LL12LL$**2HDJJaPQLL$LLDKK,CT[[KLLL99LL#Tr   r7   N)r1   r2   r3   r4   r   compilerl   r   r   r   r   r   rw   r   ro   ru   r6   r7   r   r   r|   r|      s    ZZGHF ::12LI.**%78zz"23 **VWK : G G*r   r|   c                   8    \ rS rSr\R
                  " S5      rSrg)
StyleBlock   zs(?x)
        STYLE[\ \t]*(?:\r\n|[\r\n])
        ((?:(?!-->)[^\r\n])+(?:\r\n|[\r\n]))*
        (?:\r\n|[\r\n])
    r7   Nr1   r2   r3   r4   r   r   rl   r6   r7   r   r   r   r          ZZ  	Fr   r   c                   8    \ rS rSr\R
                  " S5      rSrg)RegionBlock   ze(?x)
        REGION[\ \t]*
        ((?:(?!-->)[^\r\n])+(?:\r\n|[\r\n]))*
        (?:\r\n|[\r\n])
    r7   Nr   r7   r   r   r   r      r   r   r   c                   8    \ rS rSr\R
                  " S5      rSrg)CommentBlock   zo(?x)
        NOTE(?:\r\n|[\ \t\r\n])
        ((?:(?!-->)[^\r\n])+(?:\r\n|[\r\n]))*
        (?:\r\n|[\r\n])
    r7   Nr   r7   r   r   r   r      r   r   r   c                       \ rS rSrSr\R                  " S5      r\R                  " S5      r\R                  " S5      r	\R                  " S5      r
\S 5       rS r\S	 5       rS
 r\S 5       rS rSrg)CueBlocki  z.
A cue block. The payload is not interpreted.
z$((?:(?!-->)[^\r\n])+)(?:\r\n|[\r\n])z[ \t]+-->[ \t]+z[ \t]+((?:(?!-->)[^\r\n])+)z[^\r\n]+(?:\r\n|[\r\n])?c                 4   UR                  5       nS nUR                  U R                  5      nU(       a  UR                  S5      nUR                  [        5      nU(       d  g UR                  U R
                  5      (       d  g UR                  [        5      nU(       d  g UR                  U R                  5      nUR                  [        5        UR                  [        5      (       d  g [        U5      n[        U5      nUb  UR                  S5      OS n	[        R                  " 5       n
 UR                  U R                  5      nU(       d  O"U
R                  UR                  S5      5        ME  UR                  5         U " UXxU	U
R                  5       S9$ )Nr   r   )idstartendsettingstext)r/   r*   	_REGEX_IDr#   r   _REGEX_ARROW_REGEX_SETTINGS_REGEX_OPTIONAL_WHITESPACEr   r\   ioStringIO_REGEX_PAYLOADrr   rA   getvalue)rm   rL   id_rn   m0m1m2r   r   r   r   s              r   ro   CueBlock.parse  s6   NN3==)''!*C^^I&~~c..//^^I&^^C//012~~i(("m"$.288A;d{{}s112AJJqwwqz"	  	8
 	
r   c                    U R                   b,  UR                  U R                   5        UR                  S5        UR                  [        U R                  5      5        UR                  S5        UR                  [        U R                  5      5        U R
                  b,  UR                  S5        UR                  U R
                  5        UR                  S5        UR                  U R                  5        UR                  S5        g )Nr   z -->  )r   rr   r_   r   r   r   r   rs   s     r   ru   CueBlock.write_into7  s    77LL!LLZ

+,WZ)*==$LLLL'TTYYTr   c                 v    U R                   U R                  U R                  U R                  U R                  S.$ )Nr   r   r   r   r   r   r.   s    r   as_jsonCueBlock.as_jsonE  s1     ''ZZ88II
 	
r   c                 4    U R                   UR                   :H  $ r   )r   r   others     r   __eq__CueBlock.__eq__O  s    ||u}},,r   c                 4    U " US   US   US   US   US   S9$ )Nr   r   r   r   r   r   r7   )rm   jsons     r   	from_jsonCueBlock.from_jsonR  s3    Dzw-Uf*%
 	
r   c                    U R                   UR                   :w  a  gU R                  UR                  :w  a  gU R                  U R                  s=:*  =(       a&    UR                  s=:H  =(       a    UR                  :*  $ s  $ )NF)r   r   r   r   r   s     r   hingesCueBlock.hinges\  sY    99

"==ENN*zzTXXAAAA		AAAAr   r7   N)r1   r2   r3   r4   r5   r   r   r   r   r   r   rw   ro   ru   propertyr   r   r   r   r6   r7   r   r   r   r     s     

BCI::01Ljj!?@OZZ ;<N%
 %
N 
 
- 
 
Br   r   c              #     #    [        U R                  5       5      n[        R                  U5      v   UR	                  [
        5      (       d  UR                  [        5      (       a  M6  [        R                  U5      nU(       a  Uv   MX  [        R                  U5      nU(       a  Uv   Mz  [        R                  U5      nU(       a  Uv   M   UR	                  [
        5      (       dk  UR                  [        5      (       a  M6  [        R                  U5      nU(       a  Uv   MX  [        R                  U5      nU(       a  Uv   Mz  [        U5      eg7f)z
A generator that yields (partially) parsed WebVTT blocks when given
a bytes object containing the raw contents of a WebVTT file.
N)r   decoder|   ro   r   
_REGEX_EOFr*   _REGEX_BLANKr   r   r   r   rG   )frag_contentrL   blocks      r   parse_fragmentr   d  s     ,--/0F
++f
ll:&&>>,''!!&)K  (K""6*Kll:&&>>,''""6*Kv&K   's   EE)r5   r   r   utilsr   r   r   r-   	ExceptionrG   r   r   r   r   r   r   r\   r_   ra   ry   r|   r   r   r   r   r   r7   r   r   <module>r      s    
 	 3%' %'P *S S JJ  	 ZZ
JJ+,	zz-.ZZ	2 _M *	% 	XK Xv	 		+ 		5 	\Bu \B~*!r   