
    cCi"                         S r SSKrSSKJrJr  SSKJr  SSKJr  SSK	J
r
JrJr  SSKJrJrJr  SS	KJr  S
SKJr  \R*                  " \5      r " S S\
SS9r " S S\5      rS/rg)zq
Processor class for InstructBLIP. Largely copy of Blip2Processor with addition of a tokenizer for the Q-Former.
    N)OptionalUnion   )BatchFeature)
ImageInput)ProcessingKwargsProcessorMixinUnpack)
AddedTokenPreTokenizedInput	TextInput)logging   )AutoTokenizerc            
       2    \ rS rSrSSSSSSSSSS.	0 S.rSrg)	InstructBlipProcessorKwargs!   TFr   )	add_special_tokenspaddingstridereturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_token_type_idsreturn_lengthverbose)text_kwargsimages_kwargs N)__name__
__module____qualname____firstlineno__	_defaults__static_attributes__r       r/home/james-whalen/.local/lib/python3.13/site-packages/transformers/models/instructblip/processing_instructblip.pyr   r   !   s0     #').*/&+%*"

 Ir&   r   F)totalc            
          ^  \ rS rSrSr/ SQrSrSrSrSU 4S jjr	    SS\
\   S\\\\\   \\   4   S	\\   S
\4S jjr\S 5       rU 4S jr\U 4S j5       rSrU =r$ )InstructBlipProcessor2   aX  
Constructs an InstructBLIP processor which wraps a BLIP image processor and a LLaMa/T5 tokenizer into a single
processor.

[`InstructBlipProcessor`] offers all the functionalities of [`BlipImageProcessor`] and [`AutoTokenizer`]. See the
docstring of [`~BlipProcessor.__call__`] and [`~BlipProcessor.decode`] for more information.

Args:
    image_processor (`BlipImageProcessor`):
        An instance of [`BlipImageProcessor`]. The image processor is a required input.
    tokenizer (`AutoTokenizer`):
        An instance of ['PreTrainedTokenizer`]. The tokenizer is a required input.
    qformer_tokenizer (`AutoTokenizer`):
        An instance of ['PreTrainedTokenizer`]. The Q-Former tokenizer is a required input.
    num_query_tokens (`int`, *optional*):"
        Number of tokens used by the Qformer as queries, should be same as in model's config.
)image_processor	tokenizerqformer_tokenizer)BlipImageProcessorBlipImageProcessorFastr   c                    > [        US5      (       d,  [        SSSS9U l        UR                  U R                  /SS9  OUR                  U l        X@l        [
        TU ]  XU5        g )Nimage_tokenz<image>FT)
normalizedspecial)special_tokens)hasattrr   r2   
add_tokensnum_query_tokenssuper__init__)selfr,   r-   r.   r8   kwargs	__class__s         r'   r:   InstructBlipProcessor.__init__J   sa    y-00))tTD  $"2"2!3D I(44D 05FGr&   imagestextr<   returnc                    Uc  Uc  [        S5      eU R                  " [        4SU R                  R                  0UD6nUS   R                  SS5      n0 nUGb^  [        U[        5      (       a  U/nO8[        U[        5      (       d#  [        US   [        5      (       d  [        S5      eU R                  " U40 US   D6n	U	R                  S5      US	'   U	R                  S
5      US'   US   R                  S5      b  US   S==   U R                  -  ss'   U R                  " U40 US   D6n
Ubv  U R                  R                  U R                  -  nSUS   S'   SUS   S'   SUS   S'   U R                  " U40 US   D6nU
 H  nX    Vs/ s H
  oU   U-   PM     snX'   M!     UR                  U
5        Ub'  U R                  " U40 US   D6nUR                  U5        [!        XS9nU$ s  snf )aP  
This method uses [`BlipImageProcessor.__call__`] method to prepare image(s) for the model, and
[`BertTokenizerFast.__call__`] to prepare text for the model.

Please refer to the docstring of the above two methods for more information.
Args:
    images (`ImageInput`):
        The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
        tensor. Both channels-first and channels-last formats are supported.
    text (`TextInput`, `PreTokenizedInput`, `list[TextInput]`, `list[PreTokenizedInput]`):
        The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
        (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
        `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
Nz,You have to specify at least images or text.tokenizer_init_kwargsr   return_tensorsr   zAInvalid input text. Please provide a string, or a list of strings	input_idsqformer_input_idsattention_maskqformer_attention_mask
max_lengthFr   r   
truncationr   )tensor_type)
ValueError_merge_kwargsr   r-   init_kwargspop
isinstancestrlistr.   getr8   r2   contentupdater,   r   )r;   r?   r@   audiovideosr<   output_kwargsrD   encodingqformer_text_encodingtext_encodingimage_tokensimage_text_encodingksampleimage_encodings                   r'   __call__InstructBlipProcessor.__call__T   s%   , >dlKLL**'
"&.."<"<
 
 '}599:JDQ$$$vd++JtAw4L4L !dee$($:$:4$`=Q^C_$`!,A,E,Ek,RH()1F1J1JK[1\H-. ]+//=Im,\:d>S>SS: NN4P=3OPM!#//77$:O:OOEJm,-AB:?m,Y7=Bm,\:&*nn\&b]S`Ea&b#&AVcVf'gVfFA(>(GVf'gM$ 'OOM*!11&[M/<Z[NOON+  E (hs   G-c                 p    U R                   R                  nU R                  R                  nSS/nX-   U-   $ )NrF   rH   )r-   model_input_namesr,   )r;   tokenizer_input_namesimage_processor_input_namesqformer_input_namess       r'   rd   'InstructBlipProcessor.model_input_names   s?     $ @ @&*&:&:&L&L#24LM$BEXXXr&   c                   > [         R                  R                  U5      (       a  [        SU S35      e[         R                  " USS9  [         R                  R                  US5      nU R                  R                  U5        SU R                  ;   nU(       a  U R                  R                  S5        [        TU ]  " U40 UD6nU(       a  U =R                  S/-  sl        U$ )NzProvided path (z#) should be a directory, not a fileT)exist_okr.   )ospathisfilerL   makedirsjoinr.   save_pretrained
attributesremover9   )r;   save_directoryr<   qformer_tokenizer_pathqformer_presentoutputsr=   s         r'   rp   %InstructBlipProcessor.save_pretrained   s    77>>.))~.>>abcc
NT2!#n>Q!R../EF .@OO""#67').CFCOO 344Or&   c                    > [         TU ]  " U40 UD6n[        U[        5      (       a  US   n[        R                  " USS9nXCl        U$ )Nr   r.   )	subfolder)r9   from_pretrainedrP   tupler   r.   )clspretrained_model_name_or_pathr<   	processorr.   r=   s        r'   rz   %InstructBlipProcessor.from_pretrained   sQ    G+,ITVT	 i''!!I)99:Wcvw&7#r&   )r2   r8   )N)NNNN)r    r!   r"   r#   __doc__rq   image_processor_classtokenizer_classqformer_tokenizer_classr:   r   r   r   r   r   rR   r
   r   r   ra   propertyrd   rp   classmethodrz   r%   __classcell__)r=   s   @r'   r*   r*   2   s    $ GJL%O-H (,^bA$A I0$y/4HYCZZ[A 45A 
AF Y Y&  r&   r*   )r   rk   typingr   r   image_processing_utilsr   image_utilsr   processing_utilsr   r	   r
   tokenization_utils_baser   r   r   utilsr   autor   
get_loggerr    loggerr   r*   __all__r   r&   r'   <module>r      sd    
 " 2 % H H O O    
		H	%"2% "IN IX #
#r&   