
    +h8                        S SK rS SKrS SKrS SKrS SKrS SKJrJ	r	  S SK
Jr  S SKJrJr  SSKJrJr  \" \5      r\R(                  R+                  S5      SLr\R(                  R+                  S5      SLr\R(                  R+                  S	5      SLr\(       a  S SKrS S
KJr  O\" S5      e\(       a  S SKJr  S SKJ r J!r!  O\" S5      e\(       a  S SK"J#r#  S SK$J%r%  O\" S5      eSS jr&SS jr'S r(  SS jr) SS jr*S r+g)    N)ImageImageOps)InterpolationMode)	normalizeresize   )
get_logger
load_imageinsightfaceconsisid_eva_clipfacexlib)FaceAnalysiszPinsightface is not available. Please install it using 'pip install insightface'.)create_model_and_transforms)OPENAI_DATASET_MEANOPENAI_DATASET_STDz\consisid_eva_clip is not available. Please install it using 'pip install consisid_eva_clip'.)init_parsing_model)FaceRestoreHelperzJfacexlib is not available. Please install it using 'pip install facexlib'.c                     U R                   SS u  p#[        X#5      U::  a  U $ U[        X#5      -  n[        X$-  5      n[        X4-  5      n[        R                  " XU4[        R
                  S9n U $ )at  
Resize the input image to a specified long edge while maintaining aspect ratio.

Args:
    image (numpy.ndarray): Input image (H x W x C or H x W).
    resize_long_edge (int): The target size for the long edge of the image. Default is 768.

Returns:
    numpy.ndarray: Resized image with the long edge matching `resize_long_edge`, while maintaining the aspect
    ratio.
N   )interpolation)shapemaxintcv2r   INTER_LANCZOS4)imageresize_long_edgehwks        e/home/james-whalen/.local/lib/python3.13/site-packages/diffusers/pipelines/consisid/consisid_utils.pyresize_numpy_image_longr"   '   sj     ;;r?DA
1y$$3q9$AAE
AAE
AJJu!fC4F4FGEL    c                     S n[        U [        5      (       a  U  Vs/ s H  oC" XAU5      PM     sn$ U" XU5      $ s  snf )a/  Numpy array to tensor.

Args:
    imgs (list[ndarray] | ndarray): Input images.
    bgr2rgb (bool): Whether to change bgr to rgb.
    float32 (bool): Whether to change to float32.

Returns:
    list[tensor] | tensor: Tensor images. If returned results only have
        one element, just return tensor.
c                 B   U R                   S   S:X  aM  U(       aF  U R                  S:X  a  U R                  S5      n [        R                  " U [        R
                  5      n [        R                  " U R                  SSS5      5      n U(       a  U R                  5       n U $ )Nr   r   float64float32r      )
r   dtypeastyper   cvtColorCOLOR_BGR2RGBtorch
from_numpy	transposefloat)imgbgr2rgbr'   s      r!   	_totensorimg2tensor.<locals>._totensorK   st    99Q<1yyI%jj+,,sC$5$56Cs}}Q156))+C
r#   )
isinstancelist)imgsr2   r'   r3   r1   s        r!   
img2tensorr8   >   sE     $<@ADS	#0DAATG,, Bs   ;c                     SU SS2SS24   -  SU SS2SS24   -  -   SU SS2SS24   -  -   nUR                  SSSS5      nU$ )	a  
Converts an RGB image to grayscale by applying the standard luminosity formula.

Args:
    img (torch.Tensor): The input image tensor with shape (batch_size, channels, height, width).
                         The image is expected to be in RGB format (3 channels).

Returns:
    torch.Tensor: The grayscale image tensor with shape (batch_size, 3, height, width).
                  The grayscale values are replicated across all three channels.
gA`"?Nr   r(   gbX9?r   gv/?r   )repeat)r1   xs     r!   to_grayr<   Z   s_     	AqsFec!QqS&k11EC1Q3K4GGA	Aq!AHr#   c                 J  ^ U R                  5         [        R                  " U[        R                  5      nUR	                  U5      n[        U5      S:  a  [        US S9S   nUS   nUS   nOSnSnU R                  U5        U R                  SS	9  Uc  U R                  S   nU R                  5         [        U R                  5      S:X  a  [        S
5      eU R                  S   nUc&  [        R                  S5        UR                  U5      n[         R"                  " U5      R%                  Xg5      nUR&                  S:X  a  UR)                  S5      nU
(       a  [+        USS9R)                  S5      S-  nUR%                  U5      nU R-                  [/        U/ SQ/ SQ5      5      S   mTR1                  SSS9m/ SQn[3        U4S jU 5       5      R5                  5       n[         R6                  " U5      n[         R8                  " UU[;        U5      5      n[         R8                  " UUU5      nOV[        R                  " U	[        R                  5      n[+        USS9R)                  S5      S-  nUR%                  U5      nU=nn[=        UUR>                  [@        RB                  5      n[/        UX45      nU" UR%                  U5      SSSS9u  nn[         RD                  " USSS5      n[         RF                  " UU5      n[         RH                  " UU/SS9nUUUU4$ )a  
Process face embeddings from an image, extracting relevant features such as face embeddings, landmarks, and parsed
face features using a series of face detection and alignment tools.

Args:
    face_helper_1: Face helper object (first helper) for alignment and landmark detection.
    clip_vision_model: Pre-trained CLIP vision model used for feature extraction.
    face_helper_2: Face helper object (second helper) for embedding extraction.
    eva_transform_mean: Mean values for image normalization before passing to EVA model.
    eva_transform_std: Standard deviation values for image normalization before passing to EVA model.
    app: Application instance used for face detection.
    device: Device (CPU or GPU) where the computations will be performed.
    weight_dtype: Data type of the weights for precision (e.g., `torch.float32`).
    image: Input image in RGB format with pixel values in the range [0, 255].
    original_id_image: (Optional) Original image for feature extraction if `is_align_face` is False.
    is_align_face: Boolean flag indicating whether face alignment should be performed.

Returns:
    Tuple:
        - id_cond: Concatenated tensor of Ante face embedding and CLIP vision embedding
        - id_vit_hidden: Hidden state of the CLIP vision model, a list of tensors.
        - return_face_features_image_2: Processed face features image after normalization and parsing.
        - face_kps: Keypoints of the face detected in the image.
r   c                 H    U S   S   U S   S   -
  U S   S   U S   S   -
  -  $ )Nbboxr   r   r   r(    )r;   s    r!   <lambda>)process_face_embeddings.<locals>.<lambda>   s=    QvYq\AfIaL5PUVW]U^_`UadefldmnodpUp4qr#   )key	embeddingkpsNT)only_center_facezfacexlib align face failzMFailed to detect face using insightface. Extracting embedding with align facer(   )r2   g     o@)g
ףp=
?gv/?gCl?)gZd;O?gy&1?g?)dimkeepdim)r               	         c              3   .   >#    U  H
  nTU:H  v   M     g 7f)Nr@   ).0iparsing_outs     r!   	<genexpr>*process_face_embeddings.<locals>.<genexpr>   s     48a!8s   F)return_all_featuresreturn_hiddenshuffler   )rH   )%	clean_allr   r+   COLOR_RGB2BGRgetlensorted
read_imageget_face_landmarks_5all_landmarks_5align_warp_facecropped_facesRuntimeErrorloggerwarningget_featr-   r.   tondim	unsqueezer8   
face_parser   argmaxsumbool	ones_likewherer<   r   
image_sizer   BICUBICnormdivcat)face_helper_1clip_vision_modelface_helper_2eva_transform_meaneva_transform_stdappdeviceweight_dtyper   original_id_imageis_align_face	image_bgr	face_infoid_ante_embeddingface_kps
align_faceinputbg_labelbgwhite_imagereturn_face_features_imagereturn_face_features_image_2original_image_bgrface_features_imageid_cond_vitid_vit_hiddenid_cond_vit_normid_condrT   s                               @r!   process_face_embeddingsr   k   s   L UC$5$56I	"I
9~9*qr
	 &k2U#  Y'&&&= 003!!#
=&&'1,566,,Q/J  fg)22:>(():;>>vT"-77: :t4>>qAEI #..y@UWl/mnopq!((Q(=/484499;ooe,%*[[['%.%Q"',{{2{E'J$ \\*;S=N=NO-t<FFqIEQ DII"%A !"$5$@$@BSB[B[ $$79K_!2|,%W[ej"K zz+q!T:))K)9:Kii	K(bG
 	$	 r#   c
                    [        U[        5      (       a-  [        R                  " [	        US9R                  S5      5      n
OM[        R                  " [        R                  " [        R                  " U5      5      R                  S5      5      n
[        U
S5      n
U
n[        U UUUUUUUU
UU	5      u  ppUR                  5       R                  5       nUR                  5       nUR                  SSS5      nUR!                  5       S-  nUR#                  [        R$                  5      n[        R                  " [        R                  " U5      5      n
XX4$ )a0  
Process face embeddings from an input image for inference, including alignment, feature extraction, and embedding
concatenation.

Args:
    face_helper_1: Face helper object (first helper) for alignment and landmark detection.
    clip_vision_model: Pre-trained CLIP vision model used for feature extraction.
    face_helper_2: Face helper object (second helper) for embedding extraction.
    eva_transform_mean: Mean values for image normalization before passing to EVA model.
    eva_transform_std: Standard deviation values for image normalization before passing to EVA model.
    app: Application instance used for face detection.
    device: Device (CPU or GPU) where the computations will be performed.
    weight_dtype: Data type of the weights for precision (e.g., `torch.float32`).
    img_file_path: Path to the input image file (string) or a numpy array representing an image.
    is_align_face: Boolean flag indicating whether face alignment should be performed (default: True).

Returns:
    Tuple:
        - id_cond: Concatenated tensor of Ante face embedding and CLIP vision embedding.
        - id_vit_hidden: Hidden state of the CLIP vision model, a list of tensors.
        - image: Processed face image after feature extraction and alignment.
        - face_kps: Keypoints of the face detected in the image.
)r   RGBi   r(   r   r      )r5   strnparrayr
   convertr   exif_transposer   	fromarrayr"   r   cpudetachsqueezepermutenumpyr*   uint8)rv   rw   rx   ry   rz   r{   r|   r}   img_file_pathr   r   r~   r   r   align_crop_face_imager   tensors                    r!   process_face_embeddings_inferr      s#   J -%%-8@@GH001OPXXY^_` $E40E ?V?;G1 #&&(//1F^^F^^Aq!$F\\^c!F]]288$F##EOOF$;<E522r#   c                    [        SSSSSU[        R                  R                  U S5      S9nSUl        [        S	U[        R                  R                  U S5      S
9Ul        [        R                  R                  U  S3S/S9nUR                  SS9  [        S[        R                  R                  U SS5      SS9u  n  nUR                  n[        US[        5      n[        US[        5      n	[        U[         ["        45      (       d  U4S-  n[        U	[         ["        45      (       d  U	4S-  n	UnU	n	[%        S[        R                  R                  U S5      S/S9n
U
R                  SSS9  UR&                  R)                  5         UR                  R)                  5         UR)                  5         UR&                  R+                  U5        UR                  R+                  U5        UR+                  XS9  X4XzX4$ )a\  
Prepare all face models for the facial recognition task.

Parameters:
- model_path: Path to the directory containing model files.
- device: The device (e.g., 'cuda', 'xpu', 'cpu') where models will be loaded.
- dtype: Data type (e.g., torch.float32) for model inference.

Returns:
- face_helper_1: First face restoration helper.
- face_helper_2: Second face restoration helper.
- face_clip_model: CLIP model for face extraction.
- eva_transform_mean: Mean value for image normalization.
- eva_transform_std: Standard deviation value for image normalization.
- face_main_model: Main face analysis model.
r(   i   )r(   r(   retinaface_resnet50pngface_encoder)upscale_factor	face_size
crop_ratio	det_modelsave_extr|   model_rootpathNbisenet)
model_namer|   r   z./face_encoder/models/antelopev2/glintr100.onnxCUDAExecutionProvider)	providersr   )ctx_idzEVA02-CLIP-L-14-336zEVA02_CLIP_L_336_psz14_s6B.ptT)force_custom_clip
image_mean	image_stdr   
antelopev2)namerootr   )  r   )r   det_size)r)   )r   ospathjoinrk   r   r   	model_zoo	get_modelpreparer   visualgetattrr   r   r5   r6   tupler   face_detevalrh   )
model_pathr|   r)   rv   rx   model_face_clip_modelry   rz   face_main_models              r!   prepare_face_modelsr   #  s   $ &'ww||J?M  $M1VBGGLLUc<d M  ))33,DERiQj 4 M # .
Z1PQKE1a
 llO ,@ST>PQ(4-8802Q6'$77.014+) #Z HUlTmO 1z: !!!#f%'v+K]ppr#   )i   )TT)NT)T),importlib.util	importlibr   r   r   r   r-   PILr   r   torchvision.transformsr   !torchvision.transforms.functionalr   r   utilsr	   r
   __name__re   util	find_spec_insightface_available_consisid_eva_clip_available_facexlib_availabler   insightface.appr   ImportErrorr   r   consisid_eva_clip.constantsr   r   facexlib.parsingr   &facexlib.utils.face_restoration_helperr   r"   r8   r<   r   r   r   r@   r#   r!   <module>r      s     	 
    4 ? + 
H	"11-@L (~~778KLTXX nn..z:$F ,
h
ii=SS
t
uu3H
b
cc.-86 mt E3PBqr#   