1. .* vs .*? :
import re
input='helloworld, 123abc,test'
pattern1='.*(\d+)'
pattern2='.*?(\d+)'
result1=re.match(pattern1, input)
result2=re.match(pattern2, input)
print(result1.group(1))
=> 3 # .* match more, \d+ match 3 only,
print(result2.group(1))
=> 123 # .*? match less, \d+ match 123. Generally used.
2. match, search, findall
pattern='test'
input2=' a test, b test '
m=re.match(pattern, input2)
print(m)
=>None
m=re.search(pattern, input2)
print(m)
=><_sre.SRE_Match object; span=(2, 6), match='test'>
print(m.group())
=>test
m=re.findall(pattern, input2)
print(m)
=>['test', 'test']
3. replace by sub()
html = ''' <ul id="list" class="list-group">
<li data-view="5"><a href="null">test1</a></li>
<li data-view="6"><a href="null">test2</a></li>
</ul>
'''
html = re.sub('<a.*?>|</a>', '', html)
print(html)
=> <ul id="list" class="list-group">
<li data-view="5">test1</li>
<li data-view="6">test2</li>
</ul>
pattern = '<li.*?>(.*?)</li>'
results = re.findall(pattern, html, re.S) # re.S改行
print(results)=> ["test1","test2"]
pattern = '<li.*>(.*)</li>'
results = re.findall(pattern, html, re.S) # re.S改行
print(results)=> ["test2"]
4. get match group by name
pattern = '<li.*?>(?P<text>.*?)</li>'
results = re.seach(pattern, html, re.S) # re.S改行
print(results.group(1))=>test1
print(results.group("text"))=>test1
5. match lookahead assertion. (?=, ?! )
input1='hello world!'
input2='hello goodbye!'
pattern1='hello (?=world)' #?=world: lookahead match
pattern2='hello (?!word)' #?!world: lookahead not match
r1=re.pattern(pattern1, input1)
print(r1.group())=>hello
r2=re.pattern(pattern2, input1)
print(r2.group())=>None
r3=re.pattern(pattern2, input2)
print(r3.group())=>hello
6. match lookbehind assertion. (?<=,?<! )
没有评论:
发表评论