
    i#S                     d   S r SSKrSSKrSSKrSSKrSSKrSSKrSSKrSSKrSSK	J
r  SSKJr  SSKJr  SSKJrJr  SSKJr  SSKJr  SS	KJr  SS
KJrJrJrJrJrJrJ r J!r!J"r"J#r#J$r$J%r%J&r&J'r'  SSK(r(SSK)r)SSK*J+r+  \(       a  SSK,J-r-   " S S5      r.\." 5       r/\' " S S\#5      5       r0S\&\0\\1/\24   4   SS4S jr3SYS\1S\\1/\!4   4S jjr4S\$S\14S jr5S\$S\64S jr7\S\S\S\4S j5       r8\ " S S5      5       r9   SZS\S\!\9   S \6S!\:S"\:S\4S# jjr;   SZS$\S\!\9   S \6S!\:S"\:S\4S% jjr<S&\S'\S\4S( jr=S)\1S*\6S\14S+ jr>S,\&\ \4   S-\6S\ 4S. jr?S/\1S0\1S\14S1 jr@ S[S2\ S3\AS4\1S5\"\6   S\ 4
S6 jjrBS)\1S\64S7 jrCS\1S\\1/\!\1   4   4S8 jrDS\14S9 jrES:\S\4S; jrFS<rGS=S>S?S@SA.rHSBSCSDSESFSGSHSI.rI S\S2\!\1   SJ\AS\\1\14   4SK jjrJS)\1SL\1S\14SM jrKS]S)\1SL\"\1   SN\1SS4SO jjrLS\14SP jrMSQ\S\\SS4   4SR jrNSQ\S\\S4   4SS jrO    S^ST\"\P   SU\"\&\1\4      SV\"\1   SW\AS\4
SX jjrQg)_zGeneral utils functions.    N)Error)contextmanager)	dataclass)partialwraps)BytesIO)islice)Path)TYPE_CHECKINGAnyAsyncGeneratorCallableDict	GeneratorIterableListOptionalProtocolSetTypeUnionruntime_checkable)urlparsePunktSentenceTokenizerc                       \ rS rSr% SrSr\\\      \	S'   Sr
\S   \	S'   Sr\\   \	S'   SS	 jrSS
 jr\S\\   4S j5       r\SS j5       rSrg)GlobalsHelper+   z?Helper to retrieve globals with asynchronous NLTK data loading.N
_stopwordsr   _punkt_tokenizer_nltk_data_dirreturnc                 V   SSK Jn  S[        R                  ;   a+  [	        [        [        R                  S   5      5      U l        O\[        [        R                  R                  [        R                  R                  [        5      5      5      n[	        US-  5      U l        [        R                  R                  U R                  5      (       d  [        R                  " U R                  SS9  U R                  U;  a  UR                  U R                  5        U R                  5         g)zInitialize NLTK data download.r   path	NLTK_DATAz_static/nltk_cacheT)exist_okN)	nltk.datar%   osenvironstrr
   r!   dirnameabspath__file__existsmakedirsappend_download_nltk_data)self	nltk_pathr%   s      P/home/james-whalen/.local/lib/python3.13/site-packages/llama_index/core/utils.pywait_for_nltk_check!GlobalsHelper.wait_for_nltk_check2   s    / "**$"%d2::k+B&C"DD(ABCD"%d-A&A"BD ww~~d1122KK++d; i/T001 	  "    c                 <   SSK Jn  SSKJn    U" SU R                  /S9   U" S	U R                  /S9  g! [
         a    U" SU R                  SS9   N2f = f! [
         a    U" S
U R                  SS9   gf = f! [         a  n[        SU 35         SnAgSnAff = f)z.Download NLTK data packages in the background.r   )download)findzcorpora/stopwords)paths	stopwordsT)download_dirquietztokenizers/punkt_tab	punkt_tabzNLTK download error: N)nltkr:   r(   r;   r!   LookupError	Exceptionprint)r3   r:   	nltk_findes       r5   r2   !GlobalsHelper._download_nltk_dataH   s    !/	/T-d6I6I5JK
T09L9L8MN  T43F3FdST  T43F3FdST  	/)!-..	/sJ   5 A AA9 AA9 A63A9 5A66A9 9
BBBc                     U R                   c>  U R                  5         SSKJn  SSKJn  UR                  S5      U l         U" 5       U l        U R                   $ )z+Get stopwords, ensuring data is downloaded.r   r=   r   english)r   r6   nltk.corpusr=   nltk.tokenizer   wordsr    r3   r=   r   s      r5   r=   GlobalsHelper.stopwords]   sE     ??"$$&-<'ooi8DO$:$<D!r8   c                     U R                   c>  U R                  5         SSKJn  SSKJn  U" 5       U l         UR                  S5      U l        U R                   $ )z1Get punkt tokenizer, ensuring data is downloaded.r   rI   r   rJ   )r    r6   rK   r=   rL   r   rM   r   rN   s      r5   punkt_tokenizerGlobalsHelper.punkt_tokenizerl   sI       ($$&-<$:$<D!'ooi8DO$$$r8   )r!   r    r   )r"   N)r"   r   )__name__
__module____qualname____firstlineno____doc__r   r   r   r+   __annotations__r    r!   r6   r2   propertyr=   rQ   __static_attributes__ r8   r5   r   r   +   st    I&*Jc#*;?h78?$(NHSM(#,/* 49   % %r8   r   c            	       4    \ rS rSrS\S\S\S\\   4S jrSrg)		Tokenizer   textargskwargsr"   c                     g Nr[   )r3   r_   r`   ra   s       r5   encodeTokenizer.encode   s    r8   r[   N)	rS   rT   rU   rV   r+   r   r   rd   rZ   r[   r8   r5   r]   r]      s    L3LsLcLd3iLr8   r]   	tokenizerr"   c                     SS K n[        U [        5      (       a  U R                  UR                  l        g XR                  l        g )Nr   )llama_index.core
isinstancer]   rd   coreglobal_tokenizer)rf   llama_indexs     r5   set_global_tokenizerrm      s2    )Y'',5,<,<),5)r8   
model_namec                 t   SS K nUR                  R                  c  Sn SS KnSnS[
        R                  ;  aq  Sn[
        R                  R                  [
        R                  R                  [
        R                  R                  [        5      5      S5      [
        R                  S'   UR                  U 5      n[        UR                  SS9n[        U5        U(       a  [
        R                  S	 UR                  R                  c   eUR                  R                  $ ! [         a    [	        U5      ef = f)	Nr   z?`tiktoken` package not found, please run `pip install tiktoken`FTIKTOKEN_CACHE_DIRTz_static/tiktoken_cacheall)allowed_special)rh   rj   rk   tiktokenImportErrorr)   r*   r%   joinr,   r-   r.   encoding_for_modelr   rd   rm   )rn   rl   tiktoken_import_errrs   should_revertencrf   s          r5   get_tokenizerrz      s    ((0M 		3
 rzz1 M/1ww|| 9:(0BJJ+,
 ))*5CJJ>	Y'

/0,,888,,,)  	3122	3s   D! !D7dc                 T     [        [        R                  " 5       5      nX;  a   U$ M(  )zGet a new ID.)r+   uuiduuid4r{   new_ids     r5   
get_new_idr      s'    
TZZ\"?M	 r8   c                 b     [         R                  " S[        R                  5      nX;  a   U$ M/  )zGet a new integer ID.r   )randomrandintsysmaxsizer   s     r5   get_new_int_idr      s+    
3;;/?M	 r8   objra   c              +   >  #    U Vs0 s H  o"[        X5      _M     nnUR                  5        H  u  p$[        XU5        M      Sv   UR                  5        H  u  p$[        XU5        M     gs  snf ! UR                  5        H  u  p$[        XU5        M     f = f7f)z
Temporary setter.

Utility class for setting a temporary value for an attribute on a class.
Taken from: https://tinyurl.com/2p89xymh

N)getattritemssetattr)r   ra   kprev_valuesvs        r5   temp_set_attrsr      s      066v!gco%vK6 %%'DACA ( 7  %%'DACA (s'   BA.'BA3 +B3'BBc                   J    \ rS rSr% Sr\\   \S'   Sr\	\
\/\4      \S'   Srg)ErrorToRetry   z
Exception types that should be retried.

Args:
    exception_cls (Type[Exception]): Class of exception.
    check_fn (Optional[Callable[[Any]], bool]]):
        A function that takes an exception instance as input and returns
        whether to retry.

exception_clsNcheck_fnr[   )rS   rT   rU   rV   rW   r   rC   rX   r   r   r   r   boolrZ   r[   r8   r5   r   r      s-    	 	?"04Hhxt,-4r8   r   	lambda_fnerrors_to_retry	max_triesmin_backoff_secsmax_backoff_secsc                    U(       d  [        S5      eU Vs0 s H  nUR                  UR                  _M     nn[        UR	                  5       5      nUnSn	  U " 5       $ s  snf ! U a  n
[
        R                  " 5         U	S-  n	X:  a  e UR                  U
R                  5      nU(       a  U" U
5      (       d  e [        R                  " U5        [        US-  U5      n Sn
A
OSn
A
ff = fM  )a  
Execute lambda function with retries and exponential backoff.

Args:
    lambda_fn (Callable): Function to be called and output we want.
    errors_to_retry (List[ErrorToRetry]): List of errors to retry.
        At least one needs to be provided.
    max_tries (int): Maximum number of tries, including the first. Defaults to 10.
    min_backoff_secs (float): Minimum amount of backoff time between attempts.
        Defaults to 0.5.
    max_backoff_secs (float): Maximum amount of backoff time between attempts.
        Defaults to 60.

0At least one error to retry needs to be providedr         N
ValueErrorr   r   tuplekeys	traceback	print_excget	__class__timesleepmin)r   r   r   r   r   error_to_retryerror_checksexception_class_tuplesbackoff_secstriesrF   r   s               r5    retry_on_exceptions_with_backoffr      s    * KLL .-N 	$$n&=&==-   #<#4#4#67#LE
	C; & 		C!QJE!#''4HJJ|$|a/1ABL		C s   !A A% %C*+A5C%%C*async_fnc                   #    U(       d  [        S5      eU Vs0 s H  nUR                  UR                  _M     nn[        UR	                  5       5      nUnSn	  U " 5       I Sh  vN $ s  snf  N	! U a  n
[
        R                  " 5         U	S-  n	X:  a  e UR                  U
R                  5      nU(       a  U" U
5      (       d  e [        R                  " U5        [        US-  U5      n Sn
A
OSn
A
ff = fM  7f)a   
Execute lambda function with retries and exponential backoff.

Args:
    async_fn (Callable): Async Function to be called and output we want.
    errors_to_retry (List[ErrorToRetry]): List of errors to retry.
        At least one needs to be provided.
    max_tries (int): Maximum number of tries, including the first. Defaults to 10.
    min_backoff_secs (float): Minimum amount of backoff time between attempts.
        Defaults to 0.5.
    max_backoff_secs (float): Maximum amount of backoff time between attempts.
        Defaults to 60.

r   r   Nr   r   r   )r   r   r   r   r   r   r   r   r   r   rF   r   s               r5   !aretry_on_exceptions_with_backoffr     s     * KLL .-N 	$$n&=&==-   #<#4#4#67#LE
	C!## $% 		C!QJE!#''4HJJ|$|a/1ABL		C sL   C;!A* C;
A1 %A/&A1 )C;/A1 1C67A5C1,C;1C66C;
retry_argsretry_kwargsc                  4   ^ ^ S[         S[         4U U4S jjnU$ )zPReturn a decorator that retries with exponential backoff on provided exceptions.funcr"   c                    >^  [        T 5      S[        S[        S[        4U UU4S jj5       n[        T 5      S[        S[        S[        4U UU4S jj5       n[        R                  " T 5      (       a  U$ U$ )N	func_argsfunc_kwargsr"   c                  2   >^ ^ [        UU U4S j/TQ70 TD6$ )Nc                     > T " T0 TD6$ rc   r[   r   r   r   s   r5   <lambda>dget_retry_on_exceptions_with_backoff_decorator.<locals>.decorator.<locals>.wrapper.<locals>.<lambda>M  s    i7;7r8   )r   )r   r   r   r   r   s   ``r5   wrapperRget_retry_on_exceptions_with_backoff_decorator.<locals>.decorator.<locals>.wrapperJ  s#    37:DHT r8   c                  b   >^ ^#    S[         4UU U4S jjn[        U/TQ70 TD6I S h  vN $  N7f)Nr"   c                  0   >#    T " T0 TD6I S h  vN $  N7frc   r[   r   s   r5   foo`get_retry_on_exceptions_with_backoff_decorator.<locals>.decorator.<locals>.awrapper.<locals>.fooR  s     !9<<<<<s   )r   r   )r   r   r   r   r   r   s   `` r5   awrapperSget_retry_on_exceptions_with_backoff_decorator.<locals>.decorator.<locals>.awrapperP  sC     =s = = ; $0   s   #/-/)r   r   asyncioiscoroutinefunction)r   r   r   r   r   s   `  r5   	decoratorAget_retry_on_exceptions_with_backoff_decorator.<locals>.decoratorI  s    	t	 	C 	C 	 	 
	
 
t	s 	3 	3 	 	 
	 #66t<<xI'Ir8   )r   )r   r   r   s   `` r5   .get_retry_on_exceptions_with_backoff_decoratorr   D  s'    
J JX J J$ r8   r_   
max_lengthc                 :    [        U 5      U::  a  U $ U SUS-
   S-   $ )z"Truncate text to a maximum length.N   z...)len)r_   r   s     r5   truncate_textr   ^  s*    
4yJ *q.!E))r8   iterablesizec              #      #    [        U 5      nU(       a2  [        [        X!5      5      n[        U5      S:X  a  gUv   U(       a  M1  gg7f)z`
Iterate over an iterable in batches.

>>> list(iter_batch([1,2,3,4,5], 3))
[[1, 2, 3], [4, 5]]
r   N)iterlistr	   r   )r   r   source_iterbs       r5   
iter_batchr   e  s<      x.K
*+q6Q;	 +s   AAAr,   basenamec                 ^    X S   S:w  a  SOS-  n [         R                  R                  X5      $ )z
Append basename to dirname, avoiding backslashes when running on windows.

os.path.join(dirname, basename) will add a backslash before dirname if
basename does not end with a slash, so we make sure it does.
/ )r)   r%   ru   )r,   r   s     r5   concat_dirsr   t  s,     bkS(sb0G77<<**r8   r   show_progressdesctotalc                 V    U nU(       a   SSK Jn  U" XUS9$ U$ ! [         a     U$ f = f)z<
Optionally get a tqdm iterable. Ensures tqdm.auto is used.
r   )tqdm)r   r   )	tqdm.autor   rt   )r   r   r   r   	_iteratorr   s         r5   get_tqdm_iterabler     sC     I	&66   		s    
((c                 <    [        5       nU" U 5      n[        U5      $ rc   )rz   r   )r_   rf   tokenss      r5   count_tokensr     s    It_Fv;r8   c                 |     SSK Jn  UR	                  U 5      nUR
                  $ ! [         a    [        S5      ef = f)z
Args:
    model_name(str): the model name of the tokenizer.
                    For instance, fxmarty/tiny-llama-fast-tokenizer.

r   )AutoTokenizerzG`transformers` package not found, please run `pip install transformers`)transformersr   rt   r   from_pretrainedtokenize)rn   r   rf   s      r5   get_transformer_tokenizer_fnr     sI    
.
 --j9I  
U
 	

s   % ;c                      S[         R                  ;   a  [        [         R                  S   5      n O[        [        R                  " S5      5      n U R                  SSS9  [        U 5      $ )zg
Locate a platform-appropriate cache directory for llama_index,
and create it if it doesn't yet exist.
LLAMA_INDEX_CACHE_DIRrl   T)parentsr'   )r)   r*   r
   platformdirsuser_cache_dirmkdirr+   r$   s    r5   get_cache_dirr     sU     "**,BJJ678L//>? 	JJtdJ+t9r8   r   c                    ^  [         R                  " T 5      (       d   e[        T 5      S[        S[        S[        4U 4S jj5       nUT l        T $ )z
Decorator for adding sync version of an async function. The sync version
is added as a function attribute to the original function, func.

Args:
    func(Any): the async function for which a sync variant will be built.

r`   kwdsr"   c                  X   > [         R                  " 5       R                  T" U 0 UD65      $ rc   )r   get_event_looprun_until_complete)r`   r   r   s     r5   _wrapper"add_sync_version.<locals>._wrapper  s'    %%'::4;N;NOOr8   )r   r   r   r   sync)r   r   s   ` r5   add_sync_versionr     sZ     &&t,,,,
4[P PS PS P P DIKr8   a   
Context
LLMs are a phenomenal piece of technology for knowledge generation and reasoning.
They are pre-trained on large amounts of publicly available data.
How do we best augment LLMs with our own private data?
We need a comprehensive toolkit to help perform this data augmentation for LLMs.

Proposed Solution
That's where LlamaIndex comes in. LlamaIndex is a "data framework" to help
you build LLM  apps. It provides the following tools:

Offers data connectors to ingest your existing data sources and data formats
(APIs, PDFs, docs, SQL, etc.)
Provides ways to structure your data (indices, graphs) so that this data can be
easily used with LLMs.
Provides an advanced retrieval/query interface over your data:
Feed in any LLM input prompt, get back retrieved context and knowledge-augmented output.
Allows easy integrations with your outer application framework
(e.g. with LangChain, Flask, Docker, ChatGPT, anything else).
LlamaIndex provides tools for both beginner users and advanced users.
Our high-level API allows beginner users to use LlamaIndex to ingest and
query their data in 5 lines of code. Our lower-level APIs allow advanced users to
customize and extend any module (data connectors, indices, retrievers, query engines,
reranking modules), to fit their needs.
z38;2;237;90;200z38;2;90;149;237z38;2;11;159;203z38;2;155;135;227)
llama_pink
llama_bluellama_turquoisellama_lavender313233343536z38;5;200)redgreenyellowbluemagentacyanpinkuse_llama_index_colorsc           
          U(       a  [         nO[        n[        UR                  5       5      n[	        U 5       VVs0 s H  u  pEXSU[        U5      -     _M     snn$ s  snnf )a/  
Get a mapping of items to colors.

Args:
    items (List[str]): List of items to be mapped to colors.
    use_llama_index_colors (bool, optional): Flag to indicate
    whether to use LlamaIndex colors or ANSI colors.
        Defaults to True.

Returns:
    Dict[str, str]: Mapping of items to colors.

)_LLAMA_INDEX_COLORS_ANSI_COLORSr   r   	enumerater   )r   r  color_palettecolorsiitems         r5   get_color_mappingr    sU      +$-$$&'F9B59IJ9IgaDS[))9IJJJs   Acolorc                 N    0 [         E[        EnX;  a  SU  S3$ X!   nSU SU  S3$ )z
Get the colored version of the input text.

Args:
    text (str): Input text.
    color (str): Color to be applied to the text.

Returns:
    str: Colored version of the input text.

z[1;3mz[0mz[1;3;m)r  r  )r_   r  
all_colorss      r5   _get_colored_textr     sF     9'8<8J4&((EugQtfG,,r8   endc                 6    Ub  [        X5      OU n[        X2S9  g)a  
Print the text with the specified color.

Args:
    text (str): Text to be printed.
    color (str, optional): Color to be applied to the text. Supported colors are:
        llama_pink, llama_blue, llama_turquoise, llama_lavender,
        red, green, yellow, blue, magenta, cyan, pink.
    end (str, optional): String appended after the last character of the text.

Returns:
    None

N)r!  )r   rD   )r_   r  r!  text_to_prints       r5   
print_textr$  '  s     7<6G%d2TM	-!r8   c                       W R                   R                  5       nU(       a  gU R                  R
                  R                  5       (       a  gg! [         a!    SSKn U R                   R                  5       n N]f = f)z Infer the input to torch.device.r   Ncudampscpu)r&  is_available	NameErrortorchbackendsr'  )r+  has_cudas     r5   infer_torch_devicer.  :  sf    -::**,
 ~~&&((  -::**,-s   A (A:9A:xc              #      #    U v   g7fz
A function that returns a generator of a single element.

Args:
    x (Any): the element to build yield

Yields:
    Any: the single element

Nr[   r/  s    r5   unit_generatorr3  I  s      Gs   c                   #    U 7v   g7fr1  r[   r2  s    r5   async_unit_generatorr5  W  s      Gs   		raw_bytesr%   url	as_base64c                    U b^   [         R                  " U 5      n [         R                  " U SS9nU(       a  [	        [         R
                  " U5      5      $ [	        U5      $ Ubc  [        U[        5      (       a  [        U5      OUnUR                  5       nU(       a  [	        [         R
                  " U5      5      $ [	        U5      $ UGbh  [        U5      nUR                  S:X  a  UR                  nSU;  a  [        S5      eUR                  SS5      u  pUR                  S5      n
U
(       aG  [         R                  " U	5      nU(       a  [	        [         R
                  " U5      5      $ [	        U5      $ U(       a.  [	        [         R
                  " U	R!                  S5      5      5      $ [	        U	R!                  S5      5      $ S	S
0n["        R$                  " X,SS9nUR'                  5         U(       a)  [	        [         R
                  " UR(                  5      5      $ [	        UR(                  5      $ [        S5      e! [         a    U n GN3f = f! [         a    U n GN0f = f)aX  
Resolve binary data from various sources into a BytesIO object.

Args:
    raw_bytes: Raw bytes data
    path: File path to read bytes from
    url: URL to fetch bytes from
    as_base64: Whether to base64 encode the output bytes

Returns:
    BytesIO object containing the binary data

Raises:
    ValueError: If no valid source is provided

T)validatedata,z0Invalid data URL format: missing comma separatorr   z;base64zutf-8z
User-AgentzOLlamaIndex/0.0 (https://llamaindex.ai; info@llamaindex.ai) llama-index-core/0.0)<   r=  )headerstimeoutz0No valid source provided to resolve binary data!)base64	b64decoderC   BinasciiErrorr   	b64encoderi   r+   r
   
read_bytesr   schemer%   r   splitendswithrd   requestsr   raise_for_statuscontent)r6  r%   r7  r8  decoded_bytesr;  
parsed_url	data_partmetadataurl_datais_base64_encodeddecoded_datar>  responses                 r5   resolve_binaryrS  e  s   , 	&",,Y7M	& #,,YFM
 6++M:;;}%%		'c22tDz 6++D122t}	c]
& #I )# !STT!*a!8H ( 1 1) < %//9"6#3#3L#ABB #<00 "6#3#3HOOG4L#MNN #8??7#;<< k
 <<hG!!#6++H,<,<=>>x''((
G
HHA  	&%M	&  	&%M	&s"   H? I ?III"!I")zgpt-3.5-turbo)
   g      ?g      N@rc   )T)Nr   )NNNF)RrW   r   r@  r)   r   r   r   r   r}   binasciir   rB  
contextlibr   dataclassesr   	functoolsr   r   ior   	itertoolsr	   pathlibr
   typingr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rH  urllib.parser   rL   r   r   globals_helperr]   r+   r   rm   rz   r   intr   r   r   floatr   r   r   r   r   r   r   r   r   r   r   r   SAMPLE_TEXTr  r  r  r   r$  r.  r3  r5  bytesrS  r[   r8   r5   <module>rc     s?      	  
    + % ! $      "   !4N% N%b  M M M6E)XseTk5J*J$K 6PT 6-c -#8M -@# # c c   s y  $ 5 5 5& !"-C-C,'-C -C 	-C
 -C 	-Cf !"-C-C,'-C -C 	-C
 -C 	-C`&)4* * * *x23 3 8 + + + + MQ$(03<DSM"s s S XseT#Y>N5O "s "3 3 (6 $#((	   6:K9K.2K	#s(^K2-C - - -,"S "# "C " "&C c iT48 # .d*C  "&'+	ZIZI
5d#
$ZI 
#ZI 	ZI
 ZIr8   