
    rh+;                       S r SSKJr  / SQrSSKrSSKrSSKrSSKrSSKrSSK	r	SSK
r
\R                  " S5      r\R                  " S5      rSS jrSSS jjrSSS	 jjrSSS
 jjrSSS jjrSSS jjr  SS jrSS jrSS jrS S jr\R0                   " S S5      5       r  S!       S"S jjr\S:X  a  SSKr\R:                  " 5         gg)#z 
Tools for working with strings
    )annotations)whitespaceEqualgetNumFromStrhyphenToCamelCasecamelCaseToHyphenspaceCamelCasegetMd5	formatStrstripAccentsnormalizeFilenameremovePunctuationparenthesesMatchParenthesesMatchNz\s+z
+c                    [         R                  SU 5      n [         R                  SU5      n[        R                  SU 5      n [        R                  SU5      nX:X  a  gg)z
returns True if a and b are equal except for whitespace differences

>>> a = '    hello \n there '
>>> b = 'hello there'
>>> c = ' bye there '
>>> common.whitespaceEqual(a, b)
True
>>> common.whitespaceEqual(a, c)
False
 TF)
WHITESPACEsubLINEFEED)abs     T/home/james-whalen/.local/lib/python3.13/site-packages/music21/common/stringTools.pyr   r   -   sO     	r1Ar1ARARAv    c                    / n/ nU  H,  nXA;   a  UR                  U5        M  UR                  U5        M.     SR                  U5      SR                  U5      4$ )a   
Given a string, extract any numbers.
Return two strings, the numbers (as strings) and the remaining characters.

>>> common.getNumFromStr('23a')
('23', 'a')
>>> common.getNumFromStr('23a954Hello')
('23954', 'aHello')
>>> common.getNumFromStr('')
('', '')
r   )appendjoin)usrStrnumbersfoundremainchars        r   r   r   D   sR     EF?LLMM$	  775>2776?**r   c                    Sn[        U R                  U5      5       H!  u  p4US:X  a  UnM  X$R                  5       -  nM#     U$ )a  
Given a hyphen-connected-string, change it to
a camelCaseConnectedString.

The replacement can be specified to be something besides a hyphen.

>>> common.hyphenToCamelCase('movement-name')
'movementName'

>>> common.hyphenToCamelCase('movement_name', replacement='_')
'movementName'

Safe to call on a string lacking the replacement character:

>>> common.hyphenToCamelCase('voice')
'voice'

And on "words" beginning with numbers:

>>> common.hyphenToCamelCase('music-21')
'music21'
r   r   )	enumeratesplit
capitalize)r   replacementpostiwords        r   r   r   [   sE    . DV\\+676DOO%%D	 8
 Kr   c                   [        U5      S:w  a  [        S5      eUR                  5       U:w  a  [        S5      e[        R                  " SSU-   S-   U 5      n[        R                  " SSU-   S-   U5      R                  5       $ )a  
Given a camel-cased string, or a mixture of numbers and characters,
create a space separated string.

The replacement can be specified to be something besides a hyphen, but only
a single character and not (for internal reasons) an uppercase character.

code from https://stackoverflow.com/questions/1175208/elegant-python-function-to-convert-camelcase-to-snake-case

>>> common.camelCaseToHyphen('movementName')
'movement-name'

First letter can be uppercase as well:

>>> common.camelCaseToHyphen('MovementName')
'movement-name'

>>> common.camelCaseToHyphen('movementNameName')
'movement-name-name'

>>> common.camelCaseToHyphen('fileName', replacement='_')
'file_name'

Some things you cannot do:

>>> common.camelCaseToHyphen('fileName', replacement='NotFound')
Traceback (most recent call last):
ValueError: Replacement must be a single character.

>>> common.camelCaseToHyphen('fileName', replacement='A')
Traceback (most recent call last):
ValueError: Replacement cannot be an uppercase character.
   z'Replacement must be a single character.z-Replacement cannot be an uppercase character.z(.)([A-Z][a-z]+)z\1z\2z([a-z0-9])([A-Z]))len
ValueErrorlowerrer   )r   r%   s1s      r   r   r   {   s    F ;1BCCk)HII	"EK$7%$?	HB66%u{':U'BBGMMOOr   c                <   SnSnSnSnSn/ nUc  Sn	OUn	U  H  n
X;   a  SnOSnU(       a  U(       d
  U(       d  SnOSnU(       d  U(       d
  U(       a  SnOSnU(       aF  U
R                  5       (       d  U(       d  U(       a  UR                  S5        UR                  U
5        OUR                  U
5        U(       a  SnM  SnM     SR                  U5      nU	 H%  nSR                  U5      nUR                  X5      nM'     U(       a  UR                  SS5      nU$ )a+  
Given a camel-cased string, or a mixture of numbers and characters,
create a space separated string.

If replaceUnderscore is True (default) then underscores also become spaces (but without the _)

>>> common.spaceCamelCase('thisIsATest')
'this Is A Test'
>>> common.spaceCamelCase('ThisIsATest')
'This Is A Test'
>>> common.spaceCamelCase('movement3')
'movement 3'
>>> common.spaceCamelCase('opus41no1')
'opus 41 no 1'
>>> common.spaceCamelCase('opus23402no219235')
'opus 23402 no 219235'
>>> common.spaceCamelCase('opus23402no219235').title()
'Opus 23402 No 219235'

There is a small list called fixMeList that can fix mistakes.

>>> common.spaceCamelCase('PMFC22')
'PMFC 22'

>>> common.spaceCamelCase('hello_myke')
'hello myke'
>>> common.spaceCamelCase('hello_myke', replaceUnderscore=False)
'hello_myke'
z0123456789.F)PMFCT r   _)isupperr   r   replace)r   replaceUnderscore	fixMeListr   firstNum	firstCharisNumber	lastIsNumr&   	fixupListr    postStrfixMefixMeSpaceds                 r   r   r      s   < GHIHID 		?HHHYHH 	iII||~~YC KKKKII7 8 ggdmGhhuo//+5  //#s+Nr   c                n   U c=  [        [        R                  " 5       5      [        [        R                  " 5       5      -   n [        R                  " 5       n UR                  U 5        UR                  5       $ ! [         a2    UR                  U R                  S5      5         UR                  5       $ f = f)z
Return an md5 hash from a string.  If no value is given then
the current time plus a random number is encoded.

>>> common.getMd5('test')
'098f6bcd4621d373cade4e832627b4f6'
UTF-8)	strtimerandomhashlibmd5update	TypeErrorencode	hexdigest)valuems     r   r	   r	      s     }DIIK 3v}}#77A(	 ;;=  (	g&';;=(s   A8 8*B43B4c                   U /UQn [        [        U 5      5       HT  nX   n[        U[        5      (       a  UR	                  S5      X'   [        U[
        5      (       a  MF   [        U5      X'   MV     SR                  U 5      S-   $ ! [         a.     UR	                  S5      X'    M  ! [         a	    SX'     M  f = ff = f)z
DEPRECATED: do not use.  May be removed at any time.

Format one or more data elements into string suitable for printing
straight to stderr or other outputs

>>> a = common.formatStr('test', '1', 2, 3)
>>> print(a)
test 1 2 3
<BLANKLINE>
zutf-8r   r2   
)
ranger+   
isinstancebytesdecoderB   reprrH   AttributeErrorr   )msgrest_of_messagekeywordsr'   xs        r   r
   r
     s     
!
!C3s8_FaXXg&CF!S!! a  88C=4    XXg.CF%  CF  s*   !B
B>B''B:5B>9B::B>c                X   [         R                  " SU 5      R                  SS5      R                  SS5      R                  SS5      R                  SS5      R                  S	S5      nS
R                  U Vs/ s H"  n[         R                  " U5      (       a  M   UPM$     sn5      $ s  snf )u  
removes accents from unicode strings.

>>> s = 'trés vite'
>>> 'é' in s
True
>>> common.stripAccents(s)
'tres vite'

Also handles the German Eszett and smart quotes

>>> common.stripAccents('Muß')
'Muss'
>>> common.stripAccents('Süss, “êtré”')
'Suss, "etre"'

Note -- is is still possible to have non-Ascii characters after this,
like in this Japanese expression for music:

>>> common.stripAccents('音楽')
'音楽'
NFKD   ßssu   “"u   ”u   ‘'u   ’r   )unicodedata	normalizer5   r   	combining)inputString	nfkd_formcs      r   r   r   )  s    0 	fk2	t									  77yIy!0E0Ea0HAyIJJIs   7B'B'c                    Sn[        U 5      nUS:  a!  U S   S:X  a  [        XS-
  S 5      nU SUS-
   n [        U 5      n U R                  SS5      R	                  S5      n [
        R                  " S	S
U 5      R                  5       n Ub  X-  n U $ )u~  
take a name that might contain unicode characters, punctuation,
or spaces and
normalize it so that it is POSIX compliant (except for the limit
on length).

Takes in a string or unicode string and returns a string (unicode in Py3)
without any accented characters.

>>> common.normalizeFilename('03-Niccolò all’lessandra.not really.xml')
'03-Niccolo_all_lessandra_not_really.xml'
N   .   asciiignorerA   z[^\w-]r3   )r+   rB   r   rI   rR   r.   r   strip)name	extensionlenNames      r   r   r   K  s     I$iG{tBx3q[\*+	LWq[!D;;w)009D66)S$'--/DKr   c                r    [         R                  SS[        R                  5      nU R	                  U5      nU$ )zo
Remove all punctuation from a string.

>>> common.removePunctuation('This, is! my (face).')
'This is my face'
r   )rB   	maketransstringpunctuation	translate)srq   outs      r   r   r   g  s/     b"f&8&89I
++i
 CJr   c                  >    \ rS rSr% S\S'   S\S'   S\S'   S\S'   S	rg
)r   ir  intstartendrB   textlist[ParenthesesMatch]nested N)__name__
__module____qualname____firstlineno____annotations____static_attributes__r~   r   r   r   r   r  s    J	H
I""r   r   c           	     (   U(       a  U(       d  [        S5      e[        SSS/ 5      nU/nSnSnU[        U 5      :  Ga  U(       dO  XU[        U5      -    U:X  a;  [        U[        U5      -   SS/ 5      nUR                  U5        U[        U5      -  nMf  U(       d  XU[        U5      -    U:X  a}  [        U5      S::  a  [        SU< SU< S	U S
35      eUR	                  5       nXgl        XR                  U Ul        US   R                  R                  U5        U[        U5      -  nM  X   S:X  a  U(       + nOSnUS-  nU[        U 5      :  a  GM  [        U5      S:  a#  [        SU< S	US   R                  S-
   S35      eUR                  $ )a  
Utility tool to return a list of parentheses matches for a string using a dataclass
called `ParenthesesMatch` which has indices of the `start` and `end`
of the match, and the `text` of the match, and a set of `nested`
ParenthesesMatch objects (which may have their own nested objects).

>>> st = r'Bologne wrote (a (whole) (lot) \(of\)) sym\(ph(on)ies\) concertantes.'
>>> common.stringTools.parenthesesMatch(st)
[ParenthesesMatch(start=15, end=37, text='a (whole) (lot) \\(of\\)',
                  nested=[ParenthesesMatch(start=18, end=23, text='whole', nested=[]),
                          ParenthesesMatch(start=26, end=29, text='lot', nested=[])]),
 ParenthesesMatch(start=47, end=49, text='on', nested=[])]

Other brackets can be used:

>>> st = r'[Whammy bars] and [oboes] do [not [mix] very] [well.]'
>>> common.stringTools.parenthesesMatch(st, open='[', close=']')
[ParenthesesMatch(start=1, end=12, text='Whammy bars', nested=[]),
 ParenthesesMatch(start=19, end=24, text='oboes', nested=[]),
 ParenthesesMatch(start=30, end=44, text='not [mix] very',
                  nested=[ParenthesesMatch(start=35, end=38, text='mix', nested=[])]),
 ParenthesesMatch(start=47, end=52, text='well.', nested=[])]

The `open` and `close` parameters can be multiple characters:

>>> st = r'Did you eat <<beans>> today <<Pythagoreas<<?>>>>'
>>> common.stringTools.parenthesesMatch(st, open='<<', close='>>')
[ParenthesesMatch(start=14, end=19, text='beans', nested=[]),
 ParenthesesMatch(start=30, end=46, text='Pythagoreas<<?>>',
                  nested=[ParenthesesMatch(start=43, end=44, text='?', nested=[])])]

They cannot, however, be empty:

>>> common.stringTools.parenthesesMatch(st, open='', close='')
Traceback (most recent call last):
ValueError: Neither open nor close can be empty.

Unmatched opening or closing parentheses will raise a ValueError:

>>> common.stringTools.parenthesesMatch('My (parentheses (sometimes (continue',)
Traceback (most recent call last):
ValueError:  Opening '(' at index 3 was never closed

>>> common.stringTools.parenthesesMatch('This is a <bad> example>', open='<', close='>')
Traceback (most recent call last):
ValueError: Closing '>' without '<' at index 23.

Note that using multiple characters like a prefix can have unintended consequences:

>>> st = r'[Pitch("C4"), [Pitch("D5"), Pitch("E6")], Pitch("Pity("Z9")")]'
>>> common.stringTools.parenthesesMatch(st, open='Pitch("', close='")')
Traceback (most recent call last):
ValueError: Closing '")' without 'Pitch("' at index 59.

So to do something like this, you might need to get creative:

>>> out = common.stringTools.parenthesesMatch(st, open='("', close='")')
>>> out
[ParenthesesMatch(start=8, end=10, text='C4', nested=[]),
 ParenthesesMatch(start=22, end=24, text='D5', nested=[]),
 ParenthesesMatch(start=35, end=37, text='E6', nested=[]),
 ParenthesesMatch(start=49, end=59, text='Pity("Z9")',
                  nested=[ParenthesesMatch(start=55, end=57, text='Z9', nested=[])])]
>>> extractedPitches = []
>>> for match in out:
...     if st[match.start - 7:match.start] == 'Pitch("':
...          extractedPitches.append(match.text)
>>> extractedPitches
['C4', 'D5', 'E6', 'Pity("Z9")']

* New in v9.3.
z$Neither open nor close can be empty.r   Fr   r*   zClosing z	 without z
 at index rh   \zOpening z was never closed)	r,   r   r+   r   poprz   ry   r{   r}   )ru   openclose	mainMatchstacklastCharWasBackslashr'   curPMs           r   r   r   y  s   Z u?@@ RR0I%.KE 	A
c!f*$CI&$.$QT]BB?ELLTNA&a#e*n%.5zQ 8E9IdXZPQsRS!TUUIIKEI;;q)EJ"I##E*UOA44<';#; #( 	Q- c!f*0 5zA~8D8:eAhnnq6H5IIZ[\\r   __main__)r   rB   r   rB   returnbool)
0123456789)r   rB   r   rB   r   ztuple[str, str])-)r   rB   r%   rB   r   rB   )TN)r   rB   r   rB   )N)r   rB   )rb   rB   r   rB   )rm   rB   r   rB   )ru   rB   r   rB   )())ru   rB   r   rB   r   rB   r   r|   )__doc__
__future__r   __all__dataclassesrE   rD   r.   rC   rr   r_   compiler   r   r   r   r   r   r   r	   r
   r   r   r   	dataclassr   r   r   music21mainTestr~   r   r   <module>r      s    #    	    ZZ
::e.+.@(PVNb(   <KD8	 # # # q
q
q q 	qj z r   