
    ^h,                        S SK JrJrJr  S SKJr  S SKJrJ	r	  S SK
Jr  SSKJr  SSKJr  SSKJrJr  SS	KJrJrJr  SS
KJrJr  SSKJr  SSKJr  SSKJr  \" \5      r\S:  a  \rO\	r " S S\5      rg)    )absolute_importdivisionunicode_literals)unichr)dequeOrderedDict)version_info   )spaceCharacters)entities)asciiLettersasciiUpper2Lower)digits	hexDigitsEOF)
tokenTypestagTokenTypes)replacementCharacters)HTMLInputStream)Trie)      c                     ^  \ rS rSrSrSMU 4S jjrS rS rSNS jrS r	S r
S	 rS
 rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS rS r S r!S  r"S! r#S" r$S# r%S$ r&S% r'S& r(S' r)S( r*S) r+S* r,S+ r-S, r.S- r/S. r0S/ r1S0 r2S1 r3S2 r4S3 r5S4 r6S5 r7S6 r8S7 r9S8 r:S9 r;S: r<S; r=S< r>S= r?S> r@S? rAS@ rBSA rCSB rDSC rESD rFSE rGSF rHSG rISH rJSI rKSJ rLSK rMSLrNU =rO$ )OHTMLTokenizer   zThis class takes care of tokenizing HTML.

* self.currentToken
  Holds the token that is currently being processed.

* self.state
  Holds a reference to the method to be invoked... XXX

* self.stream
  Points to HTMLInputStream object.
c                    > [        U40 UD6U l        X l        SU l        / U l        U R
                  U l        SU l        S U l        [        [        U ]/  5         g NF)r   streamparser
escapeFlaglastFourChars	dataStatestateescapecurrentTokensuperr   __init__)selfr   r   kwargs	__class__s       \/home/james-whalen/.local/lib/python3.13/site-packages/bleach/_vendor/html5lib/_tokenizer.pyr'   HTMLTokenizer.__init__(   sS    %f77  ^^
 !mT+-    c              #     #    [        / 5      U l        U R                  5       (       a  U R                  R                  (       aN  [
        S   U R                  R                  R                  S5      S.v   U R                  R                  (       a  MN  U R                  (       a/  U R                  R                  5       v   U R                  (       a  M/  U R                  5       (       a  M  gg7f)zThis is where the magic happens.

We do our usually processing through the states and when we have a token
to return we yield the token which pauses processing until the next token
is requested.

ParseErrorr   typedataN)r   
tokenQueuer#   r   errorsr   poppopleftr(   s    r+   __iter__HTMLTokenizer.__iter__7   s       ) jjll++$$),7ASASAWAWXYAZ[[ ++$$$//oo--// /// jjlls   BC*>C*C*(C*c                 *   [         nSnU(       a  [        nSn/ nU R                  R                  5       nXR;   aD  U[        La;  UR                  U5        U R                  R                  5       nXR;   a  U[        La  M;  [        SR                  U5      U5      nU[        ;   a2  [        U   nU R                  R                  [        S   SSU0S.5        OSUs=::  a  S	::  d  O  US
:  a+  SnU R                  R                  [        S   SSU0S.5        OSUs=::  a  S::  d@  O  SUs=::  a  S::  d1  O  SUs=::  a  S::  d"  O  SUs=::  a  S::  d  O  U[        / SQ5      ;   a(  U R                  R                  [        S   SSU0S.5         [        U5      nUS:w  a@  U R                  R                  [        S   SS.5        U R                  R                  U5        U$ ! [         a+    US-
  n[        SUS-	  -  5      [        SUS-  -  5      -   n N|f = f)zThis function returns either U+FFFD or the character based on the
decimal or hexadecimal representation. It also discards ";" if present.
If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked.

       r/   z$illegal-codepoint-for-numeric-entity	charAsIntr1   r2   datavarsi   i      �r
                  i  i  )#   i  i  i i i i i i i i i i i i i i i i i	 i	 i
 i
 i i i i i i i i i i i rA   i   i   i  ;z numeric-entity-without-semicolonr0   )r   r   r   charr   appendintjoinr   r3   r   	frozensetchr
ValueErrorunget)	r(   isHexallowedradix	charStackcr>   rJ   vs	            r+   consumeNumberEntity!HTMLTokenizer.consumeNumberEntityG   s    GE	 KKlq|Q  "A lq|
 	*E2	 --(3DOO""J|,D$J1<i0H$J K ,f,8#DOO""J|,D$J1<i0H$J K
 9..9..9..9..Y (E F F &&
<0H(N5@)4L(N OK 9~ 8OO""J|,D$F$H IKKa   K'6Q"W-.Vq5y5I1JJKs   
G 2HHc                    SnU R                   R                  5       /nUS   [        ;   d  US   [        SS4;   d  Ub(  XS   :X  a   U R                   R	                  US   5        GOUS   S:X  Ga  SnUR                  U R                   R                  5       5        US   S;   a+  SnUR                  U R                   R                  5       5        U(       a  US   [        ;   d  U(       d>  US   [        ;   a1  U R                   R	                  US   5        U R                  U5      nGOPU R                  R                  [        S	   S
S.5        U R                   R	                  UR                  5       5        SSR                  U5      -   nGOUS   [        Laa  [        R                  SR                  U5      5      (       d  O7UR                  U R                   R                  5       5        US   [        La  Ma   [        R                  SR                  US S 5      5      n[!        U5      nUb  US   S:w  a%  U R                  R                  [        S	   SS.5        US   S:w  af  U(       a_  UW   [$        ;   d  XG   [        ;   d  XG   S:X  a>  U R                   R	                  UR                  5       5        SSR                  U5      -   nO[&        U   nU R                   R	                  UR                  5       5        USR                  UWS  5      -  nObU R                  R                  [        S	   SS.5        U R                   R	                  UR                  5       5        SSR                  U5      -   nU(       a  U R(                  S   S   S==   U-  ss'   g U[        ;   a  SnOSnU R                  R                  [        U   US.5        g ! ["         a    S n GNf = f)N&r   <#F)xXTr/   zexpected-numeric-entityr0   r=   rI   znamed-entity-without-semicolon=zexpected-named-entityr2   r
   SpaceCharacters
Characters)r   rJ   r   r   rQ   rK   r   r   rX   r3   r   r5   rM   entitiesTriehas_keys_with_prefixlongest_prefixlenKeyErrorr   r   r%   )	r(   allowedCharfromAttributeoutputrU   hex
entityNameentityLength	tokenTypes	            r+   consumeEntityHTMLTokenizer.consumeEntity   sO   [[%%'(	aLO+y|S#/N([aL-HKKil+q\S CT[[--/0}
*  !1!1!34 	"2IbMV$;!!)B-011#6 &&
<0H0I(K L!!)--/2rwwy11 R=+#889KLL  !1!1!34 R=+")883B9PQ
": %b>S(OO**J|4L,L,N OrNc)m|,<,6,3KK%%immo6 2779#55F%j1FKK%%immo6bggi&>??F&&
<0H(?(A B!!)--/2rwwy11f%b)!,6,(-	(	OO""Jy,A6#RS=  "!
"s   :2O O"!O"c                 $    U R                  USS9  g)zIThis method replaces the need for "entityInAttributeValueState".
        T)ri   rj   N)rp   )r(   ri   s     r+   processEntityInAttribute&HTMLTokenizer.processEntityInAttribute   s     	{$Gr-   c                 F   U R                   nUS   [        ;   a  US   R                  [        5      US'   US   [        S   :X  aC  US   n[        U5      n[        U5      [        U5      :  a  UR                  USSS2   5        X1S'   US   [        S   :X  a^  US   (       a%  U R                  R                  [        S   S	S
.5        US   (       a%  U R                  R                  [        S   SS
.5        U R                  R                  U5        U R                  U l        g)zThis method is a generic handler for emitting the tags. It also sets
the state to "data" because that's what's needed after a token has been
emitted.
r1   nameStartTagr2   Nr^   EndTagr/   zattributes-in-end-tagr0   selfClosingzself-closing-flag-on-end-tag)r%   r   	translater   r   attributeMaprg   updater3   rK   r"   r#   )r(   tokenrawr2   s       r+   emitCurrentTokenHTMLTokenizer.emitCurrentToken   s   
 !!&M]*!&M334DEE&MV}
: 66Fm#C(s8c$i'KKDbD	* $fV}
8 44=OO**J|4L4K,M N'OO**J|4L4R,T Uu%^^
r-   c                 r   U R                   R                  5       nUS:X  a  U R                  U l        g
US:X  a  U R                  U l        g
US:X  aK  U R
                  R                  [        S   SS.5        U R
                  R                  [        S   SS.5        g
U[        L a  gU[        ;   aF  U R
                  R                  [        S	   XR                   R                  [        S
5      -   S.5        g
U R                   R                  S5      nU R
                  R                  [        S   X-   S.5        g
)Nr[   r\    r/   invalid-codepointr0   rc   Frb   Tr[   r\   r   )r   rJ   entityDataStater#   tagOpenStater3   rK   r   r   r   
charsUntilr(   r2   charss      r+   r"   HTMLTokenizer.dataState   s,   {{!3;--DJ0 / S[**DJ, + XOO""J|,D,?$A BOO""J|,D,4$6 7$ ! S[_$ OO""J7H,I$(;;+A+A/SW+X$X$Z [  KK**+?@EOO""J|,D$(L$2 3r-   c                 F    U R                  5         U R                  U l        gNT)rp   r"   r#   r7   s    r+   r   HTMLTokenizer.entityDataState  s    ^^
r-   c                 t   U R                   R                  5       nUS:X  a  U R                  U l        gUS:X  a  U R                  U l        gU[
        :X  a  gUS:X  aK  U R                  R                  [        S   SS.5        U R                  R                  [        S   S	S.5        gU[        ;   aF  U R                  R                  [        S
   XR                   R                  [        S5      -   S.5        gU R                   R                  S5      nU R                  R                  [        S   X-   S.5        g)Nr[   r\   Fr   r/   r   r0   rc   rB   rb   Tr   )r   rJ   characterReferenceInRcdatar#   rcdataLessThanSignStater   r3   rK   r   r   r   r   s      r+   rcdataStateHTMLTokenizer.rcdataState"  s,   {{!3;88DJ0 / S[55DJ, + S[XOO""J|,D,?$A BOO""J|,D,4$6 7  _$ OO""J7H,I$(;;+A+A/SW+X$X$Z [  KK**+?@EOO""J|,D$(L$2 3r-   c                 F    U R                  5         U R                  U l        gr   )rp   r   r#   r7   s    r+   r   (HTMLTokenizer.characterReferenceInRcdata?  s    %%
r-   c                    U R                   R                  5       nUS:X  a  U R                  U l        g
US:X  aK  U R                  R                  [        S   SS.5        U R                  R                  [        S   SS.5        g
U[        :X  a  gU R                   R                  S	5      nU R                  R                  [        S   X-   S.5        g
Nr\   r   r/   r   r0   rc   rB   F)r\   r   T)	r   rJ   rawtextLessThanSignStater#   r3   rK   r   r   r   r   s      r+   rawtextStateHTMLTokenizer.rawtextStateD  s    {{!3;66DJ  XOO""J|,D,?$A BOO""J|,D,4$6 7  S[KK**?;EOO""J|,D$(L$2 3r-   c                    U R                   R                  5       nUS:X  a  U R                  U l        g
US:X  aK  U R                  R                  [        S   SS.5        U R                  R                  [        S   SS.5        g
U[        :X  a  gU R                   R                  S	5      nU R                  R                  [        S   X-   S.5        g
r   )	r   rJ   scriptDataLessThanSignStater#   r3   rK   r   r   r   r   s      r+   scriptDataStateHTMLTokenizer.scriptDataStateV  s    {{!3;99DJ  XOO""J|,D,?$A BOO""J|,D,4$6 7  S[KK**?;EOO""J|,D$(L$2 3r-   c                 p   U R                   R                  5       nU[        :X  a  gUS:X  aK  U R                  R	                  [
        S   SS.5        U R                  R	                  [
        S   SS.5        gU R                  R	                  [
        S   XR                   R                  S5      -   S.5        g)	NFr   r/   r   r0   rc   rB   T)r   rJ   r   r3   rK   r   r   r(   r2   s     r+   plaintextStateHTMLTokenizer.plaintextStateh  s    {{!3;XOO""J|,D,?$A BOO""J|,D,4$6 7
  OO""J|,D$(;;+A+A(+K$K$M Nr-   c                 X   U R                   R                  5       nUS:X  a  U R                  U l        gUS:X  a  U R                  U l        gU[
        ;   a&  [        S   U/ SSS.U l        U R                  U l        gUS:X  a\  U R                  R                  [        S   SS	.5        U R                  R                  [        S
   SS	.5        U R                  U l        gUS:X  aR  U R                  R                  [        S   SS	.5        U R                   R                  U5        U R                  U l        gU R                  R                  [        S   SS	.5        U R                  R                  [        S
   SS	.5        U R                   R                  U5        U R                  U l        g)N!/rw   F)r1   rv   r2   ry   selfClosingAcknowledged>r/   z'expected-tag-name-but-got-right-bracketr0   rc   z<>?z'expected-tag-name-but-got-question-markzexpected-tag-namer\   T)r   rJ   markupDeclarationOpenStater#   closeTagOpenStater   r   r%   tagNameStater3   rK   r"   rQ   bogusCommentStater   s     r+   r   HTMLTokenizer.tagOpenStatew  s   {{!3;88DJ< ; S[//DJ8 7 \!)3J)?)-r05<A!CD **DJ, + S[ OO""J|,D$M$O POO""J|,Dd#STDJ  S[ OO""J|,D$M$O PKKd#//DJ  OO""J|,D$7$9 :OO""J|,Dc#RSKKd#DJr-   c                    U R                   R                  5       nU[        ;   a%  [        S   U/ SS.U l        U R
                  U l        gUS:X  a7  U R                  R                  [        S   SS.5        U R                  U l        gU[        L a\  U R                  R                  [        S   SS.5        U R                  R                  [        S	   S
S.5        U R                  U l        gU R                  R                  [        S   SSU0S.5        U R                   R                  U5        U R                  U l        g)Nrx   Fr1   rv   r2   ry   r   r/   z*expected-closing-tag-but-got-right-bracketr0   z expected-closing-tag-but-got-eofrc   </z!expected-closing-tag-but-got-charr2   r?   T)r   rJ   r   r   r%   r   r#   r3   rK   r"   r   rQ   r   r   s     r+   r   HTMLTokenizer.closeTagOpenState  s,   {{!<)3H)=t)+E!CD**DJ" ! S[OO""J|,D$P$R SDJ  S[OO""J|,D$F$H IOO""J|,Dd#STDJ  OO""J|,D$G17$@ A KKd#//DJr-   c                    U R                   R                  5       nU[        ;   a  U R                  U l        g
US:X  a  U R                  5         g
U[        L a7  U R                  R                  [        S   SS.5        U R                  U l        g
US:X  a  U R                  U l        g
US:X  a=  U R                  R                  [        S   SS.5        U R                  S==   S	-  ss'   g
U R                  S==   U-  ss'   g
)Nr   r/   zeof-in-tag-namer0   r   r   r   rv   rB   T)r   rJ   r   beforeAttributeNameStater#   r   r   r3   rK   r   r"   selfClosingStartTagStater%   r   s     r+   r   HTMLTokenizer.tagNameState  s   {{!?"66DJ" ! S[!!#  S[OO""J|,D$5$7 8DJ  S[66DJ  XOO""J|,D,?$A Bf%1%
  f%-% r-   c                    U R                   R                  5       nUS:X  a  SU l        U R                  U l        gU R
                  R                  [        S   SS.5        U R                   R                  U5        U R                  U l        gNr   r=   rc   r\   r0   T)
r   rJ   temporaryBufferrcdataEndTagOpenStater#   r3   rK   r   rQ   r   r   s     r+   r   %HTMLTokenizer.rcdataLessThanSignState  su    {{!3;#%D 33DJ
  OO""J|,Dc#RSKKd#))DJr-   c                 <   U R                   R                  5       nU[        ;   a'  U =R                  U-  sl        U R                  U l        gU R                  R                  [        S   SS.5        U R                   R                  U5        U R                  U l        gNrc   r   r0   T)r   rJ   r   r   rcdataEndTagNameStater#   r3   rK   r   rQ   r   r   s     r+   r   #HTMLTokenizer.rcdataEndTagOpenState  s    {{!<  D( 33DJ
  OO""J|,Dd#STKKd#))DJr-   c                 Z   U R                   =(       a8    U R                   S   R                  5       U R                  R                  5       :H  nU R                  R	                  5       nU[
        ;   a6  U(       a/  [        S   U R                  / SS.U l         U R                  U l        g
US:X  a6  U(       a/  [        S   U R                  / SS.U l         U R                  U l        g
US:X  aF  U(       a?  [        S   U R                  / SS.U l         U R                  5         U R                  U l        g
U[        ;   a  U =R                  U-  sl        g
U R                  R                  [        S   SU R                  -   S	.5        U R                  R                  U5        U R                   U l        g
Nrv   rx   Fr   r   r   rc   r   r0   T)r%   lowerr   r   rJ   r   r   r   r#   r   r   r"   r   r3   rK   rQ   r   r(   appropriater2   s      r+   r   #HTMLTokenizer.rcdataEndTagNameState  s   ''mD,=,=f,E,K,K,MQUQeQeQkQkQm,m{{!?"{)3H)=)-)=)=)+E!CD 66DJ& % S[[)3H)=)-)=)=)+E!CD 66DJ  S[[)3H)=)-)=)=)+E!CD !!#DJ  \!  D(  	 OO""J|,D,043G3G,G$I JKKd#))DJr-   c                    U R                   R                  5       nUS:X  a  SU l        U R                  U l        gU R
                  R                  [        S   SS.5        U R                   R                  U5        U R                  U l        gr   )
r   rJ   r   rawtextEndTagOpenStater#   r3   rK   r   rQ   r   r   s     r+   r   &HTMLTokenizer.rawtextLessThanSignState  su    {{!3;#%D 44DJ
  OO""J|,Dc#RSKKd#**DJr-   c                 <   U R                   R                  5       nU[        ;   a'  U =R                  U-  sl        U R                  U l        gU R                  R                  [        S   SS.5        U R                   R                  U5        U R                  U l        gr   )r   rJ   r   r   rawtextEndTagNameStater#   r3   rK   r   rQ   r   r   s     r+   r   $HTMLTokenizer.rawtextEndTagOpenState  s    {{!<  D( 44DJ
  OO""J|,Dd#STKKd#**DJr-   c                 Z   U R                   =(       a8    U R                   S   R                  5       U R                  R                  5       :H  nU R                  R	                  5       nU[
        ;   a6  U(       a/  [        S   U R                  / SS.U l         U R                  U l        g
US:X  a6  U(       a/  [        S   U R                  / SS.U l         U R                  U l        g
US:X  aF  U(       a?  [        S   U R                  / SS.U l         U R                  5         U R                  U l        g
U[        ;   a  U =R                  U-  sl        g
U R                  R                  [        S   SU R                  -   S	.5        U R                  R                  U5        U R                   U l        g
r   )r%   r   r   r   rJ   r   r   r   r#   r   r   r"   r   r3   rK   rQ   r   r   s      r+   r   $HTMLTokenizer.rawtextEndTagNameState  s   ''mD,=,=f,E,K,K,MQUQeQeQkQkQm,m{{!?"{)3H)=)-)=)=)+E!CD 66DJ& % S[[)3H)=)-)=)=)+E!CD 66DJ  S[[)3H)=)-)=)=)+E!CD !!#DJ  \!  D(  	 OO""J|,D,043G3G,G$I JKKd#**DJr-   c                    U R                   R                  5       nUS:X  a  SU l        U R                  U l        gUS:X  a7  U R
                  R                  [        S   SS.5        U R                  U l        gU R
                  R                  [        S   SS.5        U R                   R                  U5        U R                  U l        g)	Nr   r=   r   rc   z<!r0   r\   T)r   rJ   r   scriptDataEndTagOpenStater#   r3   rK   r   scriptDataEscapeStartStaterQ   r   r   s     r+   r   )HTMLTokenizer.scriptDataLessThanSignState,  s    {{!3;#%D 77DJ  S[OO""J|,Dd#ST88DJ
  OO""J|,Dc#RSKKd#--DJr-   c                 <   U R                   R                  5       nU[        ;   a'  U =R                  U-  sl        U R                  U l        gU R                  R                  [        S   SS.5        U R                   R                  U5        U R                  U l        gr   )r   rJ   r   r   scriptDataEndTagNameStater#   r3   rK   r   rQ   r   r   s     r+   r   'HTMLTokenizer.scriptDataEndTagOpenState:  s    {{!<  D( 77DJ
  OO""J|,Dd#STKKd#--DJr-   c                 Z   U R                   =(       a8    U R                   S   R                  5       U R                  R                  5       :H  nU R                  R	                  5       nU[
        ;   a6  U(       a/  [        S   U R                  / SS.U l         U R                  U l        g
US:X  a6  U(       a/  [        S   U R                  / SS.U l         U R                  U l        g
US:X  aF  U(       a?  [        S   U R                  / SS.U l         U R                  5         U R                  U l        g
U[        ;   a  U =R                  U-  sl        g
U R                  R                  [        S   SU R                  -   S	.5        U R                  R                  U5        U R                   U l        g
r   )r%   r   r   r   rJ   r   r   r   r#   r   r   r"   r   r3   rK   rQ   r   r   s      r+   r   'HTMLTokenizer.scriptDataEndTagNameStateE  s   ''mD,=,=f,E,K,K,MQUQeQeQkQkQm,m{{!?"{)3H)=)-)=)=)+E!CD 66DJ& % S[[)3H)=)-)=)=)+E!CD 66DJ  S[[)3H)=)-)=)=)+E!CD !!#DJ  \!  D(  	 OO""J|,D,043G3G,G$I JKKd#--DJr-   c                 
   U R                   R                  5       nUS:X  a7  U R                  R                  [        S   SS.5        U R
                  U l        gU R                   R                  U5        U R                  U l        gN-rc   r0   T)	r   rJ   r3   rK   r   scriptDataEscapeStartDashStater#   rQ   r   r   s     r+   r   (HTMLTokenizer.scriptDataEscapeStartStatea  m    {{!3;OO""J|,Dc#RS<<DJ  KKd#--DJr-   c                 
   U R                   R                  5       nUS:X  a7  U R                  R                  [        S   SS.5        U R
                  U l        gU R                   R                  U5        U R                  U l        gr   )	r   rJ   r3   rK   r   scriptDataEscapedDashDashStater#   rQ   r   r   s     r+   r   ,HTMLTokenizer.scriptDataEscapeStartDashStatek  r   r-   c                 @   U R                   R                  5       nUS:X  a7  U R                  R                  [        S   SS.5        U R
                  U l        g
US:X  a  U R                  U l        g
US:X  aK  U R                  R                  [        S   SS.5        U R                  R                  [        S   SS.5        g
U[        :X  a  U R                  U l        g
U R                   R                  S	5      nU R                  R                  [        S   X-   S.5        g
)Nr   rc   r0   r\   r   r/   r   rB   )r\   r   r   T)r   rJ   r3   rK   r   scriptDataEscapedDashStater#   "scriptDataEscapedLessThanSignStater   r"   r   r   s      r+   scriptDataEscapedState$HTMLTokenizer.scriptDataEscapedStateu  s   {{!3;OO""J|,Dc#RS88DJ  S[@@DJ  XOO""J|,D,?$A BOO""J|,D,4$6 7  S[DJ
  KK**+?@EOO""J|,D$(L$2 3r-   c                 J   U R                   R                  5       nUS:X  a7  U R                  R                  [        S   SS.5        U R
                  U l        g	US:X  a  U R                  U l        g	US:X  a\  U R                  R                  [        S   SS.5        U R                  R                  [        S   SS.5        U R                  U l        g	U[        :X  a  U R                  U l        g	U R                  R                  [        S   US.5        U R                  U l        g	)
Nr   rc   r0   r\   r   r/   r   rB   T)r   rJ   r3   rK   r   r   r#   r   r   r   r"   r   s     r+   r   (HTMLTokenizer.scriptDataEscapedDashState  s   {{!3;OO""J|,Dc#RS<<DJ  S[@@DJ  XOO""J|,D,?$A BOO""J|,D,4$6 744DJ  S[DJ  OO""J|,Dd#ST44DJr-   c                    U R                   R                  5       nUS:X  a&  U R                  R                  [        S   SS.5        g
US:X  a  U R
                  U l        g
US:X  a7  U R                  R                  [        S   SS.5        U R                  U l        g
US:X  a\  U R                  R                  [        S   SS.5        U R                  R                  [        S   S	S.5        U R                  U l        g
U[        :X  a  U R                  U l        g
U R                  R                  [        S   US.5        U R                  U l        g
)Nr   rc   r0   r\   r   r   r/   r   rB   T)r   rJ   r3   rK   r   r   r#   r   r   r   r"   r   s     r+   r   ,HTMLTokenizer.scriptDataEscapedDashDashState  s5   {{!3;OO""J|,Dc#RS" ! S[@@DJ  S[OO""J|,Dc#RS--DJ  XOO""J|,D,?$A BOO""J|,D,4$6 744DJ  S[DJ  OO""J|,Dd#ST44DJr-   c                    U R                   R                  5       nUS:X  a  SU l        U R                  U l        gU[
        ;   a@  U R                  R                  [        S   SU-   S.5        Xl        U R                  U l        gU R                  R                  [        S   SS.5        U R                   R                  U5        U R                  U l        gr   )r   rJ   r    scriptDataEscapedEndTagOpenStater#   r   r3   rK   r    scriptDataDoubleEscapeStartStaterQ   r   r   s     r+   r   0HTMLTokenizer.scriptDataEscapedLessThanSignState  s    {{!3;#%D >>DJ  \!OO""J|,DcTXj#YZ#' >>DJ
  OO""J|,Dc#RSKKd#44DJr-   c                    U R                   R                  5       nU[        ;   a  Xl        U R                  U l        gU R                  R                  [        S   SS.5        U R                   R                  U5        U R                  U l        gr   )r   rJ   r   r    scriptDataEscapedEndTagNameStater#   r3   rK   r   rQ   r   r   s     r+   r   .HTMLTokenizer.scriptDataEscapedEndTagOpenState  st    {{!<#' >>DJ
  OO""J|,Dd#STKKd#44DJr-   c                 Z   U R                   =(       a8    U R                   S   R                  5       U R                  R                  5       :H  nU R                  R	                  5       nU[
        ;   a6  U(       a/  [        S   U R                  / SS.U l         U R                  U l        g
US:X  a6  U(       a/  [        S   U R                  / SS.U l         U R                  U l        g
US:X  aF  U(       a?  [        S   U R                  / SS.U l         U R                  5         U R                  U l        g
U[        ;   a  U =R                  U-  sl        g
U R                  R                  [        S   SU R                  -   S	.5        U R                  R                  U5        U R                   U l        g
r   )r%   r   r   r   rJ   r   r   r   r#   r   r   r"   r   r3   rK   rQ   r   r   s      r+   r   .HTMLTokenizer.scriptDataEscapedEndTagNameState  s   ''mD,=,=f,E,K,K,MQUQeQeQkQkQm,m{{!?"{)3H)=)-)=)=)+E!CD 66DJ& % S[[)3H)=)-)=)=)+E!CD 66DJ  S[[)3H)=)-)=)=)+E!CD !!#DJ  \!  D(  	 OO""J|,D,043G3G,G$I JKKd#44DJr-   c                    U R                   R                  5       nU[        [        S5      -  ;   ah  U R                  R                  [        S   US.5        U R                  R                  5       S:X  a  U R                  U l
        gU R                  U l
         gU[        ;   a;  U R                  R                  [        S   US.5        U =R                  U-  sl        gU R                   R                  U5        U R                  U l
        gN)r   r   rc   r0   scriptT)r   rJ   r   rN   r3   rK   r   r   r   scriptDataDoubleEscapedStater#   r   r   rQ   r   s     r+   r   .HTMLTokenizer.scriptDataDoubleEscapeStartState  s    {{!Oi
&;;<OO""J|,Dd#ST##))+x7!>>
  "88
  \!OO""J|,Dd#ST  D(   KKd#44DJr-   c                    U R                   R                  5       nUS:X  a7  U R                  R                  [        S   SS.5        U R
                  U l        g
US:X  a7  U R                  R                  [        S   SS.5        U R                  U l        g
US:X  aK  U R                  R                  [        S   SS.5        U R                  R                  [        S   SS.5        g
U[        :X  a7  U R                  R                  [        S   S	S.5        U R                  U l        g
U R                  R                  [        S   US.5        g
Nr   rc   r0   r\   r   r/   r   rB   eof-in-script-in-scriptT)
r   rJ   r3   rK   r    scriptDataDoubleEscapedDashStater#   (scriptDataDoubleEscapedLessThanSignStater   r"   r   s     r+   r   *HTMLTokenizer.scriptDataDoubleEscapedState  s/   {{!3;OO""J|,Dc#RS>>DJ  S[OO""J|,Dc#RSFFDJ  XOO""J|,D,?$A BOO""J|,D,4$6 7  S[OO""J|,D$=$? @DJ  OO""J|,Dd#STr-   c                    U R                   R                  5       nUS:X  a7  U R                  R                  [        S   SS.5        U R
                  U l        g
US:X  a7  U R                  R                  [        S   SS.5        U R                  U l        g
US:X  a\  U R                  R                  [        S   SS.5        U R                  R                  [        S   SS.5        U R                  U l        g
U[        :X  a7  U R                  R                  [        S   S	S.5        U R                  U l        g
U R                  R                  [        S   US.5        U R                  U l        g
r   )r   rJ   r3   rK   r   $scriptDataDoubleEscapedDashDashStater#   r   r   r   r"   r   s     r+   r   .HTMLTokenizer.scriptDataDoubleEscapedDashState  sI   {{!3;OO""J|,Dc#RSBBDJ" ! S[OO""J|,Dc#RSFFDJ  XOO""J|,D,?$A BOO""J|,D,4$6 7::DJ  S[OO""J|,D$=$? @DJ  OO""J|,Dd#ST::DJr-   c                 6   U R                   R                  5       nUS:X  a&  U R                  R                  [        S   SS.5        gUS:X  a7  U R                  R                  [        S   SS.5        U R
                  U l        gUS:X  a7  U R                  R                  [        S   SS.5        U R                  U l        gUS:X  a\  U R                  R                  [        S   SS.5        U R                  R                  [        S   S	S.5        U R                  U l        gU[        :X  a7  U R                  R                  [        S   S
S.5        U R                  U l        gU R                  R                  [        S   US.5        U R                  U l        g)Nr   rc   r0   r\   r   r   r/   r   rB   r   T)r   rJ   r3   rK   r   r   r#   r   r   r   r"   r   s     r+   r   2HTMLTokenizer.scriptDataDoubleEscapedDashDashState%  ss   {{!3;OO""J|,Dc#RS( ' S[OO""J|,Dc#RSFFDJ" ! S[OO""J|,Dc#RS--DJ  XOO""J|,D,?$A BOO""J|,D,4$6 7::DJ  S[OO""J|,D$=$? @DJ  OO""J|,Dd#ST::DJr-   c                    U R                   R                  5       nUS:X  a>  U R                  R                  [        S   SS.5        SU l        U R                  U l        gU R                   R                  U5        U R                  U l        g)Nr   rc   r0   r=   T)
r   rJ   r3   rK   r   r   scriptDataDoubleEscapeEndStater#   rQ   r   r   s     r+   r   6HTMLTokenizer.scriptDataDoubleEscapedLessThanSignState>  su    {{!3;OO""J|,Dc#RS#%D <<DJ  KKd#::DJr-   c                    U R                   R                  5       nU[        [        S5      -  ;   ah  U R                  R                  [        S   US.5        U R                  R                  5       S:X  a  U R                  U l
        gU R                  U l
         gU[        ;   a;  U R                  R                  [        S   US.5        U =R                  U-  sl        gU R                   R                  U5        U R                  U l
        gr   )r   rJ   r   rN   r3   rK   r   r   r   r   r#   r   r   rQ   r   s     r+   r  ,HTMLTokenizer.scriptDataDoubleEscapeEndStateI  s    {{!Oi
&;;<OO""J|,Dd#ST##))+x7!88
  ">>
  \!OO""J|,Dd#ST  D(   KKd#::DJr-   c                    U R                   R                  5       nU[        ;   a!  U R                   R                  [        S5        gU[        ;   a2  U R
                  S   R                  US/5        U R                  U l        gUS:X  a  U R                  5         gUS:X  a  U R                  U l        gUS;   aW  U R                  R                  [        S   SS	.5        U R
                  S   R                  US/5        U R                  U l        gUS
:X  aW  U R                  R                  [        S   SS	.5        U R
                  S   R                  SS/5        U R                  U l        gU[        L a7  U R                  R                  [        S   SS	.5        U R                  U l        gU R
                  S   R                  US/5        U R                  U l        g)NTr2   r=   r   r   )'"ra   r\   r/   #invalid-character-in-attribute-namer0   r   r   rB   z#expected-attribute-name-but-got-eof)r   rJ   r   r   r   r%   rK   attributeNameStater#   r   r   r3   r   r   r"   r   s     r+   r   &HTMLTokenizer.beforeAttributeNameStateY  s   {{!?"KK""?D92 1 \!f%,,dBZ800DJ, + S[!!#( ' S[66DJ$ # ))OO""J|,D$I$K Lf%,,dBZ800DJ  XOO""J|,D,?$A Bf%,,h^<00DJ  S[OO""J|,D$I$K LDJ  f%,,dBZ800DJr-   c                    U R                   R                  5       nSnSnUS:X  a  U R                  U l        GOU[        ;   aB  U R
                  S   S   S==   UU R                   R                  [        S5      -   -  ss'   SnGO4US:X  a  SnGO*U[        ;   a  U R                  U l        GOUS:X  a  U R                  U l        OUS	:X  aE  U R                  R                  [        S
   SS.5        U R
                  S   S   S==   S-  ss'   SnOUS;   aE  U R                  R                  [        S
   SS.5        U R
                  S   S   S==   U-  ss'   SnO_U[        L a7  U R                  R                  [        S
   SS.5        U R                  U l        OU R
                  S   S   S==   U-  ss'   SnU(       a  U R
                  S   S   S   R                  [         5      U R
                  S   S   S'   U R
                  S   S S  HE  u  pEU R
                  S   S   S   U:X  d  M   U R                  R                  [        S
   SS.5          O   U(       a  U R#                  5         g)NTFra   r2   r^   r   r   r   r   r/   r   r0   rB   r  r  r\   r  zeof-in-attribute-namezduplicate-attribute)r   rJ   beforeAttributeValueStater#   r   r%   r   r   afterAttributeNameStater   r3   rK   r   r   r"   rz   r   r   )r(   r2   leavingThisState	emitTokenrv   _s         r+   r	   HTMLTokenizer.attributeNameStatew  sj   {{!	3;77DJ\!f%b)!,&&|T:1; ;,$S[ I_$55DJS[66DJXOO""J|,D,?$A Bf%b)!,8,$_$OO""J|,D$I$K L f%b)!,4,$S[OO""J|,D,C$E FDJf%b)!,4,$
 !!&)"-a0::;KL f%b)!,,,V4Sb9$$V,R03t;OO**J|4L,A,C D	 : %%'r-   c                    U R                   R                  5       nU[        ;   a!  U R                   R                  [        S5        gUS:X  a  U R                  U l        gUS:X  a  U R                  5         gU[        ;   a2  U R                  S   R                  US/5        U R                  U l        gUS:X  a  U R                  U l        gUS:X  aW  U R                  R                  [        S   S	S
.5        U R                  S   R                  SS/5        U R                  U l        gUS;   aW  U R                  R                  [        S   SS
.5        U R                  S   R                  US/5        U R                  U l        gU[        L a7  U R                  R                  [        S   SS
.5        U R                  U l        gU R                  S   R                  US/5        U R                  U l        g)NTra   r   r2   r=   r   r   r/   r   r0   rB   r  z&invalid-character-after-attribute-namezexpected-end-of-tag-but-got-eof)r   rJ   r   r   r  r#   r   r   r%   rK   r	  r   r3   r   r   r"   r   s     r+   r  %HTMLTokenizer.afterAttributeNameState  s   {{!?"KK""?D96 5 S[77DJ2 1 S[!!#. - \!f%,,dBZ800DJ( ' S[66DJ$ # XOO""J|,D,?$A Bf%,,h^<00DJ  _$OO""J|,D$L$N Of%,,dBZ800DJ  S[OO""J|,D$E$G HDJ  f%,,dBZ800DJr-   c                    U R                   R                  5       nU[        ;   a!  U R                   R                  [        S5        gUS:X  a  U R                  U l        gUS:X  a-  U R                  U l        U R                   R                  U5        gUS:X  a  U R                  U l        gUS:X  a6  U R                  R                  [        S   SS.5        U R                  5         gUS	:X  aT  U R                  R                  [        S   S
S.5        U R                  S   S   S==   S-  ss'   U R                  U l        gUS;   aT  U R                  R                  [        S   SS.5        U R                  S   S   S==   U-  ss'   U R                  U l        gU[        L a7  U R                  R                  [        S   SS.5        U R                  U l        gU R                  S   S   S==   U-  ss'   U R                  U l        g)NTr  r[   r  r   r/   z.expected-attribute-value-but-got-right-bracketr0   r   r   r2   r^   r
   rB   )ra   r\   `z"equals-in-unquoted-attribute-valuez$expected-attribute-value-but-got-eof)r   rJ   r   r   attributeValueDoubleQuotedStater#   attributeValueUnQuotedStaterQ   attributeValueSingleQuotedStater3   rK   r   r   r%   r   r"   r   s     r+   r  'HTMLTokenizer.beforeAttributeValueState  s   {{!?"KK""?D9: 9 T\==DJ6 5 S[99DJKKd#0 / S[==DJ, + S[OO""J|,D$T$V W!!#$ # XOO""J|,D,?$A Bf%b)!,8,99DJ  _$OO""J|,D$H$J Kf%b)!,4,99DJ  S[OO""J|,D$J$L MDJ  f%b)!,4,99DJr-   c                    U R                   R                  5       nUS:X  a  U R                  U l        gUS:X  a  U R	                  S5        gUS:X  aC  U R
                  R                  [        S   SS.5        U R                  S   S   S	==   S
-  ss'   gU[        L a7  U R
                  R                  [        S   SS.5        U R                  U l        gU R                  S   S   S	==   UU R                   R                  S5      -   -  ss'   g)Nr  r[   r   r/   r   r0   r2   r^   r
   rB   z#eof-in-attribute-value-double-quote)r  r[   r   Tr   rJ   afterAttributeValueStater#   rs   r3   rK   r   r%   r   r"   r   r   s     r+   r  -HTMLTokenizer.attributeValueDoubleQuotedState  s   {{!4<66DJ  S[))#.  XOO""J|,D,?$A Bf%b)!,8,  S[OO""J|,D$I$K LDJ  f%b)!,&&'<=1> >,r-   c                    U R                   R                  5       nUS:X  a  U R                  U l        gUS:X  a  U R	                  S5        gUS:X  aC  U R
                  R                  [        S   SS.5        U R                  S   S   S	==   S
-  ss'   gU[        L a7  U R
                  R                  [        S   SS.5        U R                  U l        gU R                  S   S   S	==   UU R                   R                  S5      -   -  ss'   g)Nr  r[   r   r/   r   r0   r2   r^   r
   rB   z#eof-in-attribute-value-single-quote)r  r[   r   Tr  r   s     r+   r  -HTMLTokenizer.attributeValueSingleQuotedState  s   {{!3;66DJ  S[))#.  XOO""J|,D,?$A Bf%b)!,8,  S[OO""J|,D$I$K LDJ  f%b)!,&&';<1= =,r-   c           	         U R                   R                  5       nU[        ;   a  U R                  U l        gUS:X  a  U R                  S5        gUS:X  a  U R                  5         gUS;   aC  U R                  R                  [        S   SS.5        U R                  S   S   S	==   U-  ss'   gUS
:X  aC  U R                  R                  [        S   SS.5        U R                  S   S   S	==   S-  ss'   gU[        L a7  U R                  R                  [        S   SS.5        U R                  U l        gU R                  S   S   S	==   XR                   R                  [        S5      [        -  5      -   -  ss'   g)Nr[   r   )r  r  ra   r\   r  r/   z0unexpected-character-in-unquoted-attribute-valuer0   r2   r^   r
   r   r   rB   z eof-in-attribute-value-no-quotes)r[   r   r  r  ra   r\   r  r   T)r   rJ   r   r   r#   rs   r   r3   rK   r   r%   r   r"   r   rN   r   s     r+   r  )HTMLTokenizer.attributeValueUnQuotedState  s   {{!?"66DJ( ' S[))#.$ # S[!!#   ..OO""J|,D$V$X Yf%b)!,4,  XOO""J|,D,?$A Bf%b)!,8,  S[OO""J|,D$F$H IDJ  f%b)!,{{7M7MGH?Z8\ 1\ \,r-   c                 &   U R                   R                  5       nU[        ;   a  U R                  U l        gUS:X  a  U R                  5         gUS:X  a  U R                  U l        gU[        L aR  U R                  R                  [        S   SS.5        U R                   R                  U5        U R                  U l        gU R                  R                  [        S   SS.5        U R                   R                  U5        U R                  U l        g)Nr   r   r/   z$unexpected-EOF-after-attribute-valuer0   z*unexpected-character-after-attribute-valueT)r   rJ   r   r   r#   r   r   r   r3   rK   r   rQ   r"   r   s     r+   r  &HTMLTokenizer.afterAttributeValueState.  s    {{!?"66DJ  S[!!#  S[66DJ  S[OO""J|,D$J$L MKKd#DJ 	 OO""J|,D$P$R SKKd#66DJr-   c                    U R                   R                  5       nUS:X  a   SU R                  S'   U R                  5         gU[        L aR  U R
                  R                  [        S   SS.5        U R                   R                  U5        U R                  U l
        gU R
                  R                  [        S   SS.5        U R                   R                  U5        U R                  U l
        g)Nr   Try   r/   z#unexpected-EOF-after-solidus-in-tagr0   z)unexpected-character-after-solidus-in-tag)r   rJ   r%   r   r   r3   rK   r   rQ   r"   r#   r   r   s     r+   r   &HTMLTokenizer.selfClosingStartTagStateB  s    {{!3;/3Dm,!!#  S[OO""J|,D$I$K L KKd#DJ 	 OO""J|,D$O$Q RKKd#66DJr-   c                     U R                   R                  S5      nUR                  SS5      nU R                  R	                  [
        S   US.5        U R                   R                  5         U R                  U l        g)Nr   r   rB   Commentr0   T)	r   r   replacer3   rK   r   rJ   r"   r#   r   s     r+   r   HTMLTokenizer.bogusCommentStateT  sg     {{%%c*||Hh/	*D9	;
 	^^
r-   c                    U R                   R                  5       /nUS   S:X  aW  UR                  U R                   R                  5       5        US   S:X  a#  [        S   SS.U l        U R
                  U l        gGO\US   S;   ao  SnS H9  nUR                  U R                   R                  5       5        US   U;  d  M7  S	n  O   U(       a&  [        S
   SS S SS.U l        U R                  U l        gOUS   S:X  a  U R                  b  U R                  R                  R                  (       a  U R                  R                  R                  S   R                  U R                  R                  R                  :w  aZ  SnS H9  nUR                  U R                   R                  5       5        US   U:w  d  M7  S	n  O   U(       a  U R                  U l        gU R                  R                  [        S   SS.5        U(       a2  U R                   R                  UR!                  5       5        U(       a  M2  U R"                  U l        g)Nr^   r   r(  r=   r0   T)dD))oOrV   CtTyYpPeEFDoctype)r1   rv   publicIdsystemIdcorrect[)r1  r-  Ar4  rC  rB  r/   zexpected-dashes-or-doctype)r   rJ   rK   r   r%   commentStartStater#   doctypeStater   treeopenElements	namespacedefaultNamespacecdataSectionStater3   rQ   r5   r   )r(   rU   matchedexpecteds       r+   r   (HTMLTokenizer.markupDeclarationOpenStatec  s    [[%%'(	R=CT[[--/0}#-7	-BB$O!!33
 $ r]j(GA  !1!1!34R=0#GA -7	-B-/15404%6! "..
  ms"kk%kk++kk++B/99T[[=M=M=^=^^G:  !1!1!34R=H,#G	 ;
 !33

<(@ < > 	? KKimmo. i++
r-   c                    U R                   R                  5       nUS:X  a  U R                  U l        gUS:X  a=  U R                  R                  [        S   SS.5        U R                  S==   S-  ss'   gUS:X  a\  U R                  R                  [        S   S	S.5        U R                  R                  U R                  5        U R                  U l        gU[        L a\  U R                  R                  [        S   S
S.5        U R                  R                  U R                  5        U R                  U l        gU R                  S==   U-  ss'   U R                  U l        g)Nr   r   r/   r   r0   r2   rB   r   incorrect-commenteof-in-commentT)r   rJ   commentStartDashStater#   r3   rK   r   r%   r"   r   commentStater   s     r+   rD  HTMLTokenizer.commentStartState  sA   {{!3;33DJ$ # XOO""J|,D,?$A Bf%1%  S[OO""J|,D$7$9 :OO""4#4#45DJ  S[OO""J|,D$4$6 7OO""4#4#45DJ  f%-%**DJr-   c                    U R                   R                  5       nUS:X  a  U R                  U l        gUS:X  a=  U R                  R                  [        S   SS.5        U R                  S==   S-  ss'   gUS:X  a\  U R                  R                  [        S   S	S.5        U R                  R                  U R                  5        U R                  U l        gU[        L a\  U R                  R                  [        S   S
S.5        U R                  R                  U R                  5        U R                  U l        gU R                  S==   SU-   -  ss'   U R                  U l        g)Nr   r   r/   r   r0   r2      -�r   rO  rP  T)r   rJ   commentEndStater#   r3   rK   r   r%   r"   r   rR  r   s     r+   rQ  #HTMLTokenizer.commentStartDashState  sE   {{!3;--DJ$ # XOO""J|,D,?$A Bf%2%  S[OO""J|,D$7$9 :OO""4#4#45DJ  S[OO""J|,D$4$6 7OO""4#4#45DJ  f%t3%**DJr-   c                    U R                   R                  5       nUS:X  a  U R                  U l        g
US:X  a=  U R                  R                  [        S   SS.5        U R                  S==   S-  ss'   g
U[        L a\  U R                  R                  [        S   SS.5        U R                  R                  U R                  5        U R                  U l        g
U R                  S==   UU R                   R                  S	5      -   -  ss'   g
)Nr   r   r/   r   r0   r2   rB   rP  )r   r   T)r   rJ   commentEndDashStater#   r3   rK   r   r%   r   r"   r   r   s     r+   rR  HTMLTokenizer.commentState  s    {{!3;11DJ  XOO""J|,D,?$A Bf%1%  S[OO""J|,D,<$> ?OO""4#4#45DJ  f%&&7*8 8%r-   c                 0   U R                   R                  5       nUS:X  a  U R                  U l        g	US:X  aN  U R                  R                  [        S   SS.5        U R                  S==   S-  ss'   U R                  U l        g	U[        L a\  U R                  R                  [        S   SS.5        U R                  R                  U R                  5        U R                  U l        g	U R                  S==   SU-   -  ss'   U R                  U l        g	)
Nr   r   r/   r   r0   r2   rU  zeof-in-comment-end-dashT)r   rJ   rV  r#   r3   rK   r   r%   rR  r   r"   r   s     r+   rY  !HTMLTokenizer.commentEndDashState  s   {{!3;--DJ  XOO""J|,D,?$A Bf%2%**DJ  S[OO""J|,D$=$? @OO""4#4#45DJ  f%t3%**DJr-   c                    U R                   R                  5       nUS:X  a7  U R                  R                  U R                  5        U R
                  U l        gUS:X  aN  U R                  R                  [        S   SS.5        U R                  S==   S-  ss'   U R                  U l        gUS:X  a7  U R                  R                  [        S   S	S.5        U R                  U l        gUS
:X  a=  U R                  R                  [        S   SS.5        U R                  S==   U-  ss'   gU[        L a\  U R                  R                  [        S   SS.5        U R                  R                  U R                  5        U R
                  U l        gU R                  R                  [        S   SS.5        U R                  S==   SU-   -  ss'   U R                  U l        g)Nr   r   r/   r   r0   r2   u   --�r   z,unexpected-bang-after-double-dash-in-commentr   z,unexpected-dash-after-double-dash-in-commentzeof-in-comment-double-dashzunexpected-char-in-commentz--T)r   rJ   r3   rK   r%   r"   r#   r   rR  commentEndBangStater   r   s     r+   rV  HTMLTokenizer.commentEndState  s   {{!3;OO""4#4#45DJ2 1 XOO""J|,D,?$A Bf%3%**DJ( ' S[OO""J|,D$R$T U11DJ   S[OO""J|,D$R$T Uf%-%  S[OO""J|,D$@$B COO""4#4#45DJ 	 OO""J|,D$@$B Cf%4%**DJr-   c                    U R                   R                  5       nUS:X  a7  U R                  R                  U R                  5        U R
                  U l        gUS:X  a)  U R                  S==   S-  ss'   U R                  U l        gUS:X  aN  U R                  R                  [        S   SS.5        U R                  S==   S	-  ss'   U R                  U l        gU[        L a\  U R                  R                  [        S   S
S.5        U R                  R                  U R                  5        U R
                  U l        gU R                  S==   SU-   -  ss'   U R                  U l        g)Nr   r   r2   z--!r   r/   r   r0   u   --!�zeof-in-comment-end-bang-stateT)r   rJ   r3   rK   r%   r"   r#   rY  r   rR  r   r   s     r+   r^  !HTMLTokenizer.commentEndBangState  sI   {{!3;OO""4#4#45DJ" ! S[f%.%11DJ  XOO""J|,D,?$A Bf%4%**DJ  S[OO""J|,D$C$E FOO""4#4#45DJ  f%5%**DJr-   c                    U R                   R                  5       nU[        ;   a  U R                  U l        gU[
        L ak  U R                  R                  [        S   SS.5        SU R                  S'   U R                  R                  U R                  5        U R                  U l        gU R                  R                  [        S   SS.5        U R                   R                  U5        U R                  U l        g)Nr/   !expected-doctype-name-but-got-eofr0   FrA  zneed-space-after-doctypeT)r   rJ   r   beforeDoctypeNameStater#   r   r3   rK   r   r%   r"   rQ   r   s     r+   rE  HTMLTokenizer.doctypeState  s    {{!?"44DJ  S[OO""J|,D$G$I J+0Di(OO""4#4#45DJ 	 OO""J|,D$>$@ AKKd#44DJr-   c                    U R                   R                  5       nU[        ;   a   gUS:X  ak  U R                  R	                  [
        S   SS.5        SU R                  S'   U R                  R	                  U R                  5        U R                  U l        gUS:X  aF  U R                  R	                  [
        S   SS.5        S	U R                  S
'   U R                  U l        gU[        L ak  U R                  R	                  [
        S   SS.5        SU R                  S'   U R                  R	                  U R                  5        U R                  U l        gXR                  S
'   U R                  U l        g)Nr   r/   z+expected-doctype-name-but-got-right-bracketr0   FrA  r   r   rB   rv   rc  T)r   rJ   r   r3   rK   r   r%   r"   r#   doctypeNameStater   r   s     r+   rd  $HTMLTokenizer.beforeDoctypeNameState*  s[   {{!?"* ) S[OO""J|,D$Q$S T+0Di(OO""4#4#45DJ  XOO""J|,D,?$A B(0Df%..DJ  S[OO""J|,D$G$I J+0Di(OO""4#4#45DJ  )-f%..DJr-   c                    U R                   R                  5       nU[        ;   aA  U R                  S   R	                  [
        5      U R                  S'   U R                  U l        gUS:X  af  U R                  S   R	                  [
        5      U R                  S'   U R                  R                  U R                  5        U R                  U l        gUS:X  aN  U R                  R                  [        S   SS.5        U R                  S==   S-  ss'   U R                  U l        gU[        L a  U R                  R                  [        S   SS.5        S	U R                  S
'   U R                  S   R	                  [
        5      U R                  S'   U R                  R                  U R                  5        U R                  U l        gU R                  S==   U-  ss'   g)Nrv   r   r   r/   r   r0   rB   zeof-in-doctype-nameFrA  T)r   rJ   r   r%   rz   r   afterDoctypeNameStater#   r3   rK   r"   r   rg  r   r   s     r+   rg  HTMLTokenizer.doctypeNameStateD  s   {{!?"(,(9(9&(A(K(KL\(]Df%33DJ& % S[(,(9(9&(A(K(KL\(]Df%OO""4#4#45DJ  XOO""J|,D,?$A Bf%1%..DJ  S[OO""J|,D$9$; <+0Di((,(9(9&(A(K(KL\(]Df%OO""4#4#45DJ  f%-%r-   c                    U R                   R                  5       nU[        ;   a   gUS:X  a7  U R                  R	                  U R
                  5        U R                  U l        gU[        L a  SU R
                  S'   U R                   R                  U5        U R                  R	                  [        S   SS.5        U R                  R	                  U R
                  5        U R                  U l        gUS;   aH  SnS	 H&  nU R                   R                  5       nX;  d  M$  Sn  O   U(       a  U R                  U l        gOMUS
;   aG  SnS H&  nU R                   R                  5       nX;  d  M$  Sn  O   U(       a  U R                  U l        gU R                   R                  U5        U R                  R	                  [        S   SSU0S.5        SU R
                  S'   U R                  U l        g)Nr   FrA  r/   eof-in-doctyper0   r8  T))uU)bB)lL)iIr0  sS)r5  rv  r2  r;  )mMz*expected-space-or-right-bracket-in-doctyper2   r?   )r   rJ   r   r3   rK   r%   r"   r#   r   rQ   r   afterDoctypePublicKeywordStateafterDoctypeSystemKeywordStatebogusDoctypeState)r(   r2   rK  rL  s       r+   rj  #HTMLTokenizer.afterDoctypeNameState]  s   {{!?"\ [ S[OO""4#4#45DJV U S[+0Di(KKd#OO""J|,D$4$6 7OO""4#4#45DJH E z!!9H;;++-D+"'!9 !%!D!DDJ  #!9H;;++-D+"'!9 !%!D!DDJ KKd#OO""J|,D$P%+TN$4 5 ,1Di(//DJr-   c                 `   U R                   R                  5       nU[        ;   a  U R                  U l        gUS;   aR  U R
                  R                  [        S   SS.5        U R                   R                  U5        U R                  U l        gU[        L ak  U R
                  R                  [        S   SS.5        SU R                  S'   U R
                  R                  U R                  5        U R                  U l        gU R                   R                  U5        U R                  U l        g	N)r  r  r/   unexpected-char-in-doctyper0   rm  FrA  T)r   rJ   r   "beforeDoctypePublicIdentifierStater#   r3   rK   r   rQ   r   r%   r"   r   s     r+   r{  ,HTMLTokenizer.afterDoctypePublicKeywordState     {{!?"@@DJ  ZOO""J|,D$@$B CKKd#@@DJ  S[OO""J|,D$4$6 7+0Di(OO""4#4#45DJ  KKd#@@DJr-   c                 @   U R                   R                  5       nU[        ;   a   gUS:X  a!  SU R                  S'   U R                  U l        gUS:X  a!  SU R                  S'   U R                  U l        gUS:X  ak  U R                  R                  [        S   SS.5        S	U R                  S
'   U R                  R                  U R                  5        U R                  U l        gU[        L ak  U R                  R                  [        S   SS.5        S	U R                  S
'   U R                  R                  U R                  5        U R                  U l        gU R                  R                  [        S   SS.5        S	U R                  S
'   U R                  U l        g)Nr  r=   r?  r  r   r/   unexpected-end-of-doctyper0   FrA  rm  r  T)r   rJ   r   r%   (doctypePublicIdentifierDoubleQuotedStater#   (doctypePublicIdentifierSingleQuotedStater3   rK   r   r"   r   r}  r   s     r+   r  0HTMLTokenizer.beforeDoctypePublicIdentifierState  s   {{!?"0 / T\,.Dj)FFDJ* ) S[,.Dj)FFDJ$ # S[OO""J|,D$?$A B+0Di(OO""4#4#45DJ  S[OO""J|,D$4$6 7+0Di(OO""4#4#45DJ 	 OO""J|,D$@$B C+0Di(//DJr-   c                    U R                   R                  5       nUS:X  a  U R                  U l        gUS:X  a=  U R                  R                  [        S   SS.5        U R                  S==   S-  ss'   gUS:X  ak  U R                  R                  [        S   S	S.5        S
U R                  S'   U R                  R                  U R                  5        U R                  U l        gU[        L ak  U R                  R                  [        S   SS.5        S
U R                  S'   U R                  R                  U R                  5        U R                  U l        gU R                  S==   U-  ss'   g)Nr  r   r/   r   r0   r?  rB   r   r  FrA  rm  T
r   rJ   !afterDoctypePublicIdentifierStater#   r3   rK   r   r%   r"   r   r   s     r+   r  6HTMLTokenizer.doctypePublicIdentifierDoubleQuotedState  V   {{!4<??DJ& % XOO""J|,D,?$A Bj)X5)  S[OO""J|,D$?$A B+0Di(OO""4#4#45DJ  S[OO""J|,D$4$6 7+0Di(OO""4#4#45DJ  j)T1)r-   c                    U R                   R                  5       nUS:X  a  U R                  U l        gUS:X  a=  U R                  R                  [        S   SS.5        U R                  S==   S-  ss'   gUS:X  ak  U R                  R                  [        S   S	S.5        S
U R                  S'   U R                  R                  U R                  5        U R                  U l        gU[        L ak  U R                  R                  [        S   SS.5        S
U R                  S'   U R                  R                  U R                  5        U R                  U l        gU R                  S==   U-  ss'   g)Nr  r   r/   r   r0   r?  rB   r   r  FrA  rm  Tr  r   s     r+   r  6HTMLTokenizer.doctypePublicIdentifierSingleQuotedState  V   {{!3;??DJ& % XOO""J|,D,?$A Bj)X5)  S[OO""J|,D$?$A B+0Di(OO""4#4#45DJ  S[OO""J|,D$4$6 7+0Di(OO""4#4#45DJ  j)T1)r-   c                    U R                   R                  5       nU[        ;   a  U R                  U l        gUS:X  a7  U R
                  R                  U R                  5        U R                  U l        gUS:X  aF  U R
                  R                  [        S   SS.5        SU R                  S'   U R                  U l        gUS:X  aF  U R
                  R                  [        S   SS.5        SU R                  S'   U R                  U l        gU[        L ak  U R
                  R                  [        S   S	S.5        S
U R                  S'   U R
                  R                  U R                  5        U R                  U l        gU R
                  R                  [        S   SS.5        S
U R                  S'   U R                  U l        g)Nr   r  r/   r  r0   r=   r@  r  rm  FrA  T)r   rJ   r   -betweenDoctypePublicAndSystemIdentifiersStater#   r3   rK   r%   r"   r   (doctypeSystemIdentifierDoubleQuotedState(doctypeSystemIdentifierSingleQuotedStater   r}  r   s     r+   r  /HTMLTokenizer.afterDoctypePublicIdentifierState  s   {{!?"KKDJ2 1 S[OO""4#4#45DJ, + S[OO""J|,D$@$B C,.Dj)FFDJ" ! S[OO""J|,D$@$B C,.Dj)FFDJ  S[OO""J|,D$4$6 7+0Di(OO""4#4#45DJ 	 OO""J|,D$@$B C+0Di(//DJr-   c                    U R                   R                  5       nU[        ;   a   gUS:X  a7  U R                  R	                  U R
                  5        U R                  U l        gUS:X  a!  SU R
                  S'   U R                  U l        gUS:X  a!  SU R
                  S'   U R                  U l        gU[        :X  ak  U R                  R	                  [        S   SS.5        S	U R
                  S
'   U R                  R	                  U R
                  5        U R                  U l        gU R                  R	                  [        S   SS.5        S	U R
                  S
'   U R                  U l        g)Nr   r  r=   r@  r  r/   rm  r0   FrA  r  T)r   rJ   r   r3   rK   r%   r"   r#   r  r  r   r   r}  r   s     r+   r  ;HTMLTokenizer.betweenDoctypePublicAndSystemIdentifiersState  sR   {{!?"* ) S[OO""4#4#45DJ$ # S[,.Dj)FFDJ  S[,.Dj)FFDJ  S[OO""J|,D$4$6 7+0Di(OO""4#4#45DJ 	 OO""J|,D$@$B C+0Di(//DJr-   c                 `   U R                   R                  5       nU[        ;   a  U R                  U l        gUS;   aR  U R
                  R                  [        S   SS.5        U R                   R                  U5        U R                  U l        gU[        L ak  U R
                  R                  [        S   SS.5        SU R                  S'   U R
                  R                  U R                  5        U R                  U l        gU R                   R                  U5        U R                  U l        gr  )r   rJ   r   "beforeDoctypeSystemIdentifierStater#   r3   rK   r   rQ   r   r%   r"   r   s     r+   r|  ,HTMLTokenizer.afterDoctypeSystemKeywordState)  r  r-   c                 @   U R                   R                  5       nU[        ;   a   gUS:X  a!  SU R                  S'   U R                  U l        gUS:X  a!  SU R                  S'   U R                  U l        gUS:X  ak  U R                  R                  [        S   SS.5        S	U R                  S
'   U R                  R                  U R                  5        U R                  U l        gU[        L ak  U R                  R                  [        S   SS.5        S	U R                  S
'   U R                  R                  U R                  5        U R                  U l        gU R                  R                  [        S   SS.5        S	U R                  S
'   U R                  U l        g)Nr  r=   r@  r  r   r/   r  r0   FrA  rm  T)r   rJ   r   r%   r  r#   r  r3   rK   r   r"   r   r}  r   s     r+   r  0HTMLTokenizer.beforeDoctypeSystemIdentifierState=  s   {{!?"0 / T\,.Dj)FFDJ* ) S[,.Dj)FFDJ$ # S[OO""J|,D$@$B C+0Di(OO""4#4#45DJ  S[OO""J|,D$4$6 7+0Di(OO""4#4#45DJ 	 OO""J|,D$@$B C+0Di(//DJr-   c                    U R                   R                  5       nUS:X  a  U R                  U l        gUS:X  a=  U R                  R                  [        S   SS.5        U R                  S==   S-  ss'   gUS:X  ak  U R                  R                  [        S   S	S.5        S
U R                  S'   U R                  R                  U R                  5        U R                  U l        gU[        L ak  U R                  R                  [        S   SS.5        S
U R                  S'   U R                  R                  U R                  5        U R                  U l        gU R                  S==   U-  ss'   g)Nr  r   r/   r   r0   r@  rB   r   r  FrA  rm  T
r   rJ   !afterDoctypeSystemIdentifierStater#   r3   rK   r   r%   r"   r   r   s     r+   r  6HTMLTokenizer.doctypeSystemIdentifierDoubleQuotedStateZ  r  r-   c                    U R                   R                  5       nUS:X  a  U R                  U l        gUS:X  a=  U R                  R                  [        S   SS.5        U R                  S==   S-  ss'   gUS:X  ak  U R                  R                  [        S   S	S.5        S
U R                  S'   U R                  R                  U R                  5        U R                  U l        gU[        L ak  U R                  R                  [        S   SS.5        S
U R                  S'   U R                  R                  U R                  5        U R                  U l        gU R                  S==   U-  ss'   g)Nr  r   r/   r   r0   r@  rB   r   r  FrA  rm  Tr  r   s     r+   r  6HTMLTokenizer.doctypeSystemIdentifierSingleQuotedStater  r  r-   c                    U R                   R                  5       nU[        ;   a   gUS:X  a7  U R                  R	                  U R
                  5        U R                  U l        gU[        L ak  U R                  R	                  [        S   SS.5        SU R
                  S'   U R                  R	                  U R
                  5        U R                  U l        gU R                  R	                  [        S   SS.5        U R                  U l        g)	Nr   r/   rm  r0   FrA  r  T)r   rJ   r   r3   rK   r%   r"   r#   r   r   r}  r   s     r+   r  /HTMLTokenizer.afterDoctypeSystemIdentifierState  s    {{!?"  S[OO""4#4#45DJ  S[OO""J|,D$4$6 7+0Di(OO""4#4#45DJ
  OO""J|,D$@$B C//DJr-   c                 j   U R                   R                  5       nUS:X  a7  U R                  R                  U R                  5        U R
                  U l        gU[        L aR  U R                   R                  U5        U R                  R                  U R                  5        U R
                  U l        g g)Nr   T)	r   rJ   r3   rK   r%   r"   r#   r   rQ   r   s     r+   r}  HTMLTokenizer.bogusDoctypeState  s    {{!3;OO""4#4#45DJ  S[KKd#OO""4#4#45DJ  r-   c                    / n UR                  U R                  R                  S5      5        UR                  U R                  R                  S5      5        U R                  R                  5       nU[        :X  a  O3US:X  d   eUS   SS  S:X  a  US   S S US'   OUR                  U5        M  SR                  U5      nUR                  S5      nUS	:  aI  [        U5       H(  nU R                  R                  [        S
   SS.5        M*     UR                  SS5      nU(       a%  U R                  R                  [        S   US.5        U R                  U l        g)NT]r   r^   z]]r=   r   r   r/   r   r0   rB   rc   )rK   r   r   rJ   r   rM   countranger3   r   r)  r"   r#   )r(   r2   rJ   	nullCountr  s        r+   rJ  HTMLTokenizer.cdataSectionState  s;   KK..s34KK..s34;;##%Ds{s{"{8BC=D(#Bx}DHKK%  wwt}JJx(	q=9%&&
<0H0C(E F & <<(3DOO""J|,D,0$2 3^^
r-   )	r%   r$   r    r!   r   r#   r   r   r3   )Nr   )P__name__
__module____qualname____firstlineno____doc__r'   r8   rX   rp   rs   r   r"   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r	  r  r  r  r  r  r  r   r   r   rD  rQ  rR  rY  rV  r^  rE  rd  rg  rj  r{  r  r  r  r  r  r|  r  r  r  r  r}  rJ  __static_attributes____classcell__)r*   s   @r+   r   r      s   
.0 FPNT`H
$8:
:
$$!F0,		8		8	8((,	8 *.2	 <4l@ D&&2($+Z..$&>."421f(:00<4(:00& r-   r   N) 
__future__r   r   r   bleach.six_shimr   rO   collectionsr   r   sysr	   	constantsr   r   r   r   r   r   r   r   r   r   _inputstreamr   _trier   rd   dictr{   objectr    r-   r+   <module>r     sX    B B ) *  &  5 - - 0 , ) H~6LLlF lr-   