
    01i42                       S SK Jr  S SKrS SKrS SKJr  S SKJrJrJ	r	  S SK
JrJrJrJrJrJrJrJrJr  \(       a  S SKrO S SKr\(       d   e S SKJrJr  S SK
JrJr  S SKJrJrJr   S SKrS	rS SK J!r!J"r"J#r#J$r$J%r%J&r&    S       SS jjr'Sr(SS jr)SS jr*SS jr+/ SQr, " S S\5      r-g! \ a    S SKr Nnf = f! \ a    SrS
r N]f = f)    )annotationsN)
HTMLParser)StringIO
TextIOBaseTextIOWrapper)	IOTYPE_CHECKINGAnyDictListOptionalTextIOTupleUnion)normpathsep)r	   cast)urljoinurlsplit
urlunsplitTF)BytesIOWrapperInputSourcePythonInputSourceStringInputSourceURLInputSourcecreate_input_sourcec                
   [        U [        5      (       a  U R                  S4$ [        U [        5      (       Gap  SnU R	                  5       nSn[        U[
        5      (       ai  [        [        [        [        [        4   UR                  5      n[        U[        5      (       a  UnO%[        U[        5      (       a  UR                  5       n[        (       a  Ub  [        R                  " U5      nXs4$ [        U[
        5      (       a7  U R!                  5       n[        R                  " UR#                  5       5      nXs4$ [        R                  " UR#                  5       5      n Xs4$ Ub  [$        R                  " U5      nXs4$ [$        R&                  " U R!                  5       5      nXs4$ [)        U SS9n  U R*                  n	U	SL=(       a    U	R1                  5       S;   n
U
(       a
  [3        XS9nOSn U R	                  5       n U R!                  5       nUc  Uc  [5        S[7        U 5       35      e Uc  SOU R9                  5       nSnUb^  [        U[
        5      (       aI  UR                  n[        U[        5      (       a  UnO%[        U[        5      (       a  UR                  5       n U
(       a{  Ubx  Ub  UnO@Ub  UR#                  5       nO,[:        (       a  Uc   eUc  Sn[=        XLS9R#                  5       nUR?                  U5        URA                  5       URC                  5       p7GO[        (       a  SnUb  [        R                  " U5      nOUb  [        U[
        5      (       d  Uc(  Ub%  [        R                  " UR#                  5       5      nO[:        (       a  Uc   e[        R                  " UR#                  5       5      nO~SnUb?  [$        R                  " U5      Ub   URE                  5         Ub   URE                  5         $ $ Ub  UnO[:        (       a  Uc   eUc  Sn[=        XLS9n[$        R&                  " U5      nXs4Ub   URE                  5         Ub   URE                  5         $ $ ! [,        [.        4 a    Sn	 GNf = f! [,        [.        4 a    Sn GNf = f! [,        [.        4 a    Sn GNf = f! [,        [.        4 a    Sn GNf = f! [,         a     Nf = f! [,         a     $ f = f! [,         a     Nf = f! [,         a     $ f = f! Ub"   URE                  5         O! [,         a     Of = fUb"   URE                  5         f ! [,         a     f f = ff = f)	a  Extract JSON from a source document.

The source document can be JSON or HTML with embedded JSON script elements (type attribute = "application/ld+json").
To process as HTML `source.content_type` must be set to "text/html" or "application/xhtml+xml".

Args:
    source: the input source document (JSON or HTML)
    fragment_id: if source is an HTML document then extract only the script element with matching id attribute, defaults to None
    extract_all_scripts: if source is an HTML document then extract all script elements (unless fragment_id is provided), defaults to False (extract only the first script element)

Returns:
    Tuple with the extracted JSON document and value of the HTML base element
Nzjson-ld)format)z	text/htmlzapplication/xhtml+xml)fragment_idextract_all_scriptszLSource does not have a character stream or a byte stream and cannot be used zutf-8)encoding)#
isinstancer   datar   getByteStreamr   r   r   strr   r   wrappedgetvalue_HAS_ORJSONorjsonloadsgetCharacterStreamreadjsonloadr   content_typeAttributeErrorLookupErrorlowerHTMLJSONParser
ValueErrortypegetEncodingr	   r   feedget_jsonget_baseclose)sourcer   r    	html_baseb_streamoriginal_stringwrapped_inner	json_dictc_streamr/   is_htmlhtml_docparser
b_encodingunderlying_stringhtml_string
use_streams                   [/home/james-whalen/.local/lib/python3.13/site-packages/rdflib/plugins/shared/jsonld/util.pysource_to_jsonrI   +   sH   * &+,,{{D  &+,,	 ''))-h// sHj'@!A8CSCSTM---"/M844"/"8"8":;*"LL9	 ## Hn55!446"LL9	 ## #LL9	 ##	 * JJ7	 ## !IIf&?&?&AB	## !	:F** $& <+=+=+? D ,G 3A#4
 ''),,. H,Z[_`f[gZhi
 	
,4,<D&BTBTBV
 (,
8^ D D ((mS)) -x00 - 6 6 87~1 ,#4%&mmo =#///%!(J+HJOOQ,#1#:#:#<n>U>U>Wy[I ,"LL):;	%*X~*N*N#&"LL9	 =#///"LL9	I ,zz"34      # #%
 =#///%!(J*8I
		*-I#     q K(  K(  K(  K( 
v " 
 "  " 
 "   !  !   s   5Q6 3R R( 2S +D;T *S>S*<T S:$T
6RRR%$R%(R>=R>SS
S'&S'*
S76S7:
TT

TTU&T0/U&0
T=:U&<T==U&UU&
U"U&!U""U&)#/:c                t    [          H+  nU R                  U5      nUS:  d  M  U S US-    XS-   S  4s  $    U S 4$ )N   )VOCAB_DELIMSrfind)iridelimats      rH   	split_irirU      sJ    YYu7xa=#1fh-//  9    c                ,   SU;   a  U$ [        U 5      n[        U5      nUR                  (       a  U$ UR                  S;   a  UR                  R                  SS5      nS[	        U5      S:  a  US   OS-   n[        XSR                  5      nUR                  (       a  SUR                   3OSnUR                   SUS    U U 3nO[        [        X5      5      n	[        U	S	   5      n
[        S:w  a$  SR                  U
R                  [        5      5      n
U	S	   R                  S5      (       a  U
R                  S5      (       d  U
S-  n
[        U	SS	 U
4-   U	S
S -   5      nUR                  S5      (       a  UR                  S5      (       d  US-  nU$ )a  
```python
>>> norm_url('http://example.org/', '/one')
'http://example.org/one'
>>> norm_url('http://example.org/', '/one#')
'http://example.org/one#'
>>> norm_url('http://example.org/one', 'two')
'http://example.org/two'
>>> norm_url('http://example.org/one/', 'two')
'http://example.org/one/two'
>>> norm_url('http://example.org/', 'http://example.net/one')
'http://example.net/one'
>>> norm_url('http://example.org/', 'http://example.org//one')
'http://example.org//one'

```
z://)urnzurn-xrK   rO    rJ   rL   r         N)r   schemepathsplitlenr   fragmentr   r   joinendswithr   )baseurlparsed_base
parsed_urlbase_path_parts	base_pathjoined_pathr`   resultpartsr]   s              rH   norm_urlrl      sy   $ |
 4.K#J
--%**00a8_1E1I?1-rR	i90:0C0CQz**+,&&'q);(<[M(T+,a!#:88DJJsO,D8S!!$--*<*<CKDE!AJ$059<=
||C!5!5#MrV   c                   U R                   S:w  ak   U R                  nU HW  nSU;   d  M  UR                  S5      UR                  S5      pCUS:  d  M4  US:  d  M<  [	        U R
                  X#S-   U 5      s  $    gg! [         a     gf = f)a4  
Please note that JSON-LD documents served with the `application/ld+json` media type
MUST have all context information, including references to external contexts,
within the body of the document. Contexts linked via a
http://www.w3.org/ns/json-ld#context HTTP Link Header MUST be
ignored for such documents.
application/ld+jsonNz+ rel="http://www.w3.org/ns/json-ld#context"<>rN   rO   )r/   linksr0   indexr   rd   )r;   rq   linkijs        rH   context_from_urlinputsourcerv     s     33	LLE D<Dzz#

31r6a"f"6::tEA??  4  		s   A= =
B
	B
)r-   rI   rU   rl   rv   r)   r(   c                  V   ^  \ rS rSr  S   S	U 4S jjjrS rS rS
S jrS rSr	U =r
$ )r3   i#  c                   > [         TU ]  5         Xl        / U l        SU l        SU l        S U l        X l        SU l        g )NFr   )	super__init__r   r-   contains_jsonfragment_id_does_not_matchrc   r    script_count)selfr   r    	__class__s      rH   rz   HTMLJSONParser.__init__$  sB    
 	& "	"*/'	#6 rV   c                   SU l         SU l        US:X  aT  U HM  u  p4US:X  a  US:X  a	  SU l         M  US:X  d  M"  U R                  (       d  M5  X@R                  :w  d  MF  SU l        MO     g US:X  a  U H  u  p4US:X  d  M  X@l        M     g g )	NFscriptr5   rn   Tidrc   href)r{   r|   r   rc   )r~   tagattrsattrvalues        rH   handle_starttagHTMLJSONParser.handle_starttag2  s    "*/' (?$6>e/D&D)-D&T\d&6&6&65DTDT;T6:D3	  % F]$6> %I  % rV   c                   U R                   SL a  U R                  SL a  U R                  (       d  U R                  S:  a  g UR	                  5       S:X  a  g [
        (       a  [        R                  " U5      nO[        R                  " U5      n[        U[        5      (       a  U R                  R                  U5        OU R                  R                  U5        U =R                  S-  sl        g g g )NTFr   rY   rO   )r{   r|   r    r}   stripr(   r)   r*   r-   r"   listextendappend)r~   r#   parseds      rH   handle_dataHTMLJSONParser.handle_dataD  s     %$*I*IU*R++0A0AA0Ezz|r! {  d+D) &$''		  (		  ("1 +S%rV   c                    U R                   $ N)r-   r~   s    rH   r8   HTMLJSONParser.get_jsonb      yyrV   c                    U R                   $ r   )rc   r   s    rH   r9   HTMLJSONParser.get_basee  r   rV   )rc   r{   r    r   r|   r-   r}   NF)r   Optional[str]r    Optional[bool])returnz
List[Dict])__name__
__module____qualname____firstlineno__rz   r   r   r8   r9   __static_attributes____classcell__)r   s   @rH   r3   r3   #  s@     &*.3" , &$#< rV   r3   r   )r;   zMOptional[Union[IO[bytes], TextIO, InputSource, str, bytes, pathlib.PurePath]]r   r   r    r   r   z#Tuple[Union[Dict, List[Dict]], Any])rR   r%   r   zTuple[str, Optional[str]])rc   r%   rd   r%   r   r%   )r;   r   r   r   ).
__future__r   r-   pathlibhtml.parserr   ior   r   r   typingr   r	   r
   r   r   r   r   r   r   ImportError
simplejson	posixpathr   r   r   urllib.parser   r   r   r)   r(   rdflib.parserr   r   r   r   r   r   rI   rP   rU   rl   rv   __all__r3    rV   rH   <module>r      s    "   " 2 2 U U U"t $ & 6 6K  "&*/XX 	X
 (X )Xv ,`@.CZ Ce  "!"  FKs$   B2 +C 2
B?>B?
CC