2022年10月27日星期四

2022年10月19日星期三

linux uname distribution check

# uname -a

Linux 6cfa069b60a5 4.14.291-218.527.amzn2.x86_64 #1 SMP Fri Aug 26 09:54:31 UTC 2022 x86_64 x86_64 x86_64 GNU/Linux

 

# cat /etc/os-release

NAME="Ubuntu"

VERSION="20.04.2 LTS (Focal Fossa)"

ID=ubuntu

ID_LIKE=debian

PRETTY_NAME="Ubuntu 20.04.2 LTS"

VERSION_ID="20.04"

HOME_URL="https://www.ubuntu.com/"

SUPPORT_URL="https://help.ubuntu.com/"

BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/"

PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy"

VERSION_CODENAME=focal

UBUNTU_CODENAME=focal

2022年10月14日星期五

shell sed insert. append replace

追加: i (insert), a(append) 注意:'中の処理は一つの処理とする'

sed -e '/pattern/ a内容'  #pattern行の後ろに、aで内容をappend

sed -e '/pattern/ i内容'  #pattern行の先頭に、iで内容をinsert


範囲限定の削除

sed -e '/fromPattern/,/toPattern/ d'  #fromPattern行(含む)~toPattern行(含む)まで、dで削除する

範囲限定の置換

sed -e '/fromPattern/,/toPattern/ s/pattern/replacement/'  #fromPattern行(含む)~toPattern行(含む)まで、sでpatternにmatchする文字列をreplacementで置換する。


2022年10月4日星期二

python3 regex .* vs .*? : match more or less, match, search, findall, sub

1.  .* vs  .*? :

import re

input='helloworld, 123abc,test'
pattern1='.*(\d+)'
pattern2='.*?(\d+)'

result1=re.match(pattern1, input)
result2=re.match(pattern2, input)

print(result1.group(1)) 
=> 3    # .* match more, \d+ match 3 only

print(result2.group(1)) 
=> 123 # .*? match less, \d+ match 123.  Generally used.

2. match, search, findall
pattern='test'
input2=' a test, b test '
m=re.match(pattern, input2)
print(m)
=>None
m=re.search(pattern, input2)
print(m)
=><_sre.SRE_Match object; span=(2, 6), match='test'>
print(m.group())
=>test
m=re.findall(pattern, input2)
print(m)
=>['test', 'test']

3. replace by sub() 
html = '''    <ul id="list" class="list-group">
        <li data-view="5"><a href="null">test1</a></li>
        <li data-view="6"><a href="null">test2</a></li>
    </ul>
    '''

html = re.sub('<a.*?>|</a>', '', html)
print(html)
=>    <ul id="list" class="list-group">
        <li data-view="5">test1</li>
        <li data-view="6">test2</li>
 </ul>

pattern = '<li.*?>(.*?)</li>'
results = re.findall(pattern, html, re.S)  # re.S改行
print(results)=> ["test1","test2"]
pattern = '<li.*>(.*)</li>'
results = re.findall(pattern, html, re.S)  # re.S改行
print(results)=> ["test2"]

4. get match group by name 
pattern = '<li.*?>(?P<text>.*?)</li>'
results = re.seach(pattern, html, re.S)  # re.S改行
print(results.group(1))=>test1
print(results.group("text"))=>test1

5. match lookahead assertion. (?=, ?! )
input1='hello world!'
input2='hello goodbye!'

pattern1='hello (?=world)'       #?=world: lookahead match
pattern2='hello (?!word)'  #?!world: lookahead not match 

r1=re.pattern(pattern1, input1)
print(r1.group())=>hello

r2=re.pattern(pattern2, input1)
print(r2.group())=>None
r3=re.pattern(pattern2, input2)
print(r3.group())=>hello

6. match lookbehind assertion. (?<=,?<! )
>>> m = re.search('(?<=abc)def', 'abcdef')  #?<=lookbehind match
>>> m.group(0)
'def'

m = re.search(r'(?<=-)\w+', 'spam-egg')     # must has - before \w+  
>>> m.group(0)
'egg'