
    11i                         S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	J
r
Jr  S SKJr  S SKJr   " S S5      rg)    N)
TableGroupColumn)Tree)grapheme_patternc                       \ rS rSrSrSrSrSSSSS	.\S
SS./\S.S./0r\SS\	4S jj5       r
S\	4S jrS\R                  \	SS4   4S jr\SSS jj5       r\SS\SS 4S jj5       r\SSS jj5       rS rSrg)Profile   z?
An Orthography Profile as specified by Moran and Cysouw 2018.
GraphemeNULLtables	Tutf-8)	delimiterheaderencodingstring)namedatatyperequired)columns
primaryKey)dialecttableSchemaNreturnc                     [         R                   " U R                  5      n[        U=(       d    S5      US   S   S'   U$ )N r   r   url)copyMDstr)clsfnamemds      J/home/james-whalen/.local/lib/python3.13/site-packages/segments/profile.pydefault_metadataProfile.default_metadata,   s4    YYsvv!$U[b!18Q	    specsc           
         [         R                  " 5       U l        [        5       U l        UR                  SS5      U l        UR                  SS5      U l        X l        [        R                  " [        5      n[        U5       GH9  u  pEU R                  U;  a  [        S5      eU R                  (       ah  UR                  5        VVs0 s HK  u  pg[         R"                  " U R                  U5      Uc  SO [         R"                  " U R                  U5      _MM     nnnUR                  U R                  5      nU(       d  [        S5      eU R                  R%                  UR'                  5       5      U l        XR                  ;  a  XPR                  U'   GM  UR)                  SR+                  US-   U5      5        GM<     [-        [/        U R                  R'                  5       5      5      U l        gs  snnf )ac  

Parameters
----------
specs : list of dict
    A list of grapheme specifications.
kw :
    The following keyword arguments are recognized:
    - fname: Path of the profile or profile metadata.
    - form: Unicode normalization to apply to the data in the profile before use.
    - remaining keyword arguments are assigned as dict to `Profile.metadata`.
r"   Nformzinvalid grapheme specificationzGrapheme must not be emptyz+line {0}:duplicate grapheme in profile: {1}   )collectionsOrderedDict	graphemessetcolumn_labelspopr"   r*   metadatalogging	getLogger__name__	enumerateGRAPHEME_COL
ValueErroritemsunicodedata	normalizeunionkeyswarningformatr   listtree)	selfr(   kwlogispeckvgraphemes	            r$   __init__Profile.__init__2   s    %002 UVVGT*
FF64(	) 'GA  , !ABByy !%

. !-  ))$))Q7 !	{/D/DTYYPQ/RS ,  .
 xx 1 12H !=>>!%!3!3!9!9$))+!FD ~~-+/x(AHHQPXY[) (, dnn11345	#.s   AG3c           	   #   L  #    U R                   R                  5        Hw  u  pU R                  U0nUR                  U R                   Vs0 s H  oDS _M     sn5        UR                  UR                  5        VVs0 s H  u  pEXE_M	     snn5        Uv   My     g s  snf s  snnf 7fN)r.   r9   r7   updater0   )rB   rI   rF   resrG   rH   s         r$   	iteritemsProfile.iteritems^   s     "nn224NH$$h/CJJ););<);A4);<=JJ667I	 5<6s   AB$	B
%B$:BB$c                     [         R                  " U5      nSn[        UR                  5      S:w  a  [        S5      eUR                  nUR                  [        R                  " U5      US9  [        R                  " 5          [        R                   " S5        U " UR                  S   R#                  US9 VVVs/ s HI  nUR%                  5        VVs0 s H(  u  pxXwU R&                  :w  a  XR(                  :X  a  SOU_M*     snnPMK     snnn0 UD6n	SSS5        U	$ ! [        R                  R                   a+    [         R
                  " U R                  U5      5      nUn GNMf = fs  snnf s  snnnf ! , (       d  f       W	$ = f)z[
Read an orthography profile from a metadata file or a default tab-separated profile file.
N   z2profile description must contain exactly one table)r"   r*   ignorer   r"   )r   	from_filejsondecoderJSONDecodeError	fromvaluer%   lenr   r8   common_propsrN   pathlibPathwarningscatch_warningssimplefilter	iterdictsr9   r7   r   )
r!   r"   r*   tgopfnamer2   drG   rH   rO   s
             r$   rV   Profile.from_filee   sZ   
	%%e,BG ryy>QQRR??gll51=$$&!!(+ 99Q<111@B@a  !wwy*(tq C$4$44hhdQN(*@B 	C ' 
 ||++ 	%%c&:&:5&ABBG	* B '& 
sB   D 9E8<E1/E+E1	E8AE('E(+E11E88
Ftextc           
          [         R                  " [        R                  " U5      5      nUR	                  5        VVs/ s H-  u  pE[         R
                  " U R                  U4SU4X$4/5      PM/     nnnU " U6 $ s  snnf )z
Create a Profile instance from the Unicode graphemes found in `text`.

Parameters
----------
text
mapping

Returns
-------
A Profile instance.

	frequency)r,   Counterr   findallmost_commonr-   r7   )r!   rg   mappingr.   rI   ri   r(   s          r$   	from_textProfile.from_text}   s      ''(8(@(@(FG	 (1'<'<'>@
 (?#	 ##!!8,i(#%% & (? 	 @ E{@s   4A:c                     [         R                  " U5      R                  SS9 nUR                  5       nU R	                  SR                  U5      US9sS S S 5        $ ! , (       d  f       g = f)Nr   )r    rm   )r]   r^   open	readlinesrn   join)r!   r"   rm   fpliness        r$   from_textfileProfile.from_textfile   sM    \\% %%w%72LLNE==%'=B 877s   /A
A+c                    [         R                  " U R                  5       5      nU R                   Hf  nX R                  :w  d  M  UR
                  S   R                  R                  R                  [        R                  " X R                  S.5      5        Mh     UR
                  S   R                  U R                  5       SS9R                  S5      R                  5       $ )zM
A Profile is represented as tab-separated lines of grapheme specifications.
r   )r   nullNrU   utf8)r   rZ   r%   r0   r7   r   r   r   appendr   r   writerP   decodestrip)rB   rc   cols      r$   __str__Profile.__str__   s     !!$"7"7"9:%%C'''		!((0077$$c99%EFH &
 yy|!!$.."2$!?FFvNTTVVr'   )r0   r"   r*   r.   r2   rA   rM   )r   r   rr   )r5   
__module____qualname____firstlineno____doc__r7   r   r   classmethoddictr%   rJ   typing	GeneratorrP   rV   r    rn   rx   r   __static_attributes__ r'   r$   r   r      s     LD "&" ' %1(0(,  #/	 

B, T  
*6t *6X6++D$,<=   . S 	  . C C

Wr'   r   )r   r   r3   r]   r_   r,   r:   json.decoderrW   csvwr   r   segments.treer   segments.utilr   r   r   r'   r$   <module>r      s4            #  *UW UWr'   