Lazy Bird.: 十月 2022

2022年10月27日星期四

shell while read from file

while read LINE

do

    echo $LINE

done < debug.log_key.txt

2022年10月19日星期三

linux uname distribution check

# uname -a

Linux 6cfa069b60a5 4.14.291-218.527.amzn2.x86_64 #1 SMP Fri Aug 26 09:54:31 UTC 2022 x86_64 x86_64 x86_64 GNU/Linux

# cat /etc/os-release

NAME="Ubuntu"

VERSION="20.04.2 LTS (Focal Fossa)"

ID=ubuntu

ID_LIKE=debian

PRETTY_NAME="Ubuntu 20.04.2 LTS"

VERSION_ID="20.04"

HOME_URL="https://www.ubuntu.com/"

SUPPORT_URL="https://help.ubuntu.com/"

BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/"

PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy"

VERSION_CODENAME=focal

UBUNTU_CODENAME=focal

2022年10月14日星期五

shell sed insert. append replace

追加: i (insert), a(append) 注意：'中の処理は一つの処理とする'

sed -e '/pattern/ a内容' #pattern行の後ろに、aで内容をappend

sed -e '/pattern/ i内容' #pattern行の先頭に、iで内容をinsert

範囲限定の削除

sed -e '/fromPattern/,/toPattern/ d' #fromPattern行（含む）～toPattern行（含む）まで、ｄで削除する

範囲限定の置換

sed -e '/fromPattern/,/toPattern/ s/pattern/replacement/' #fromPattern行（含む）～toPattern行（含む）まで、sでpatternにmatchする文字列をreplacementで置換する。

2022年10月4日星期二

python3 regex .* vs .*? : match more or less, match, search, findall, sub

1. .* vs .*? :

import re

input='helloworld, 123abc,test'

pattern1='.*(\d+)'

pattern2='.*?(\d+)'

result1=re.match(pattern1, input)

result2=re.match(pattern2, input)

print(result1.group(1))

=> 3 # .* match more, \d+ match 3 only,

print(result2.group(1))

=> 123 # .*? match less, \d+ match 123. Generally used.

2. match, search, findall

pattern='test'

input2=' a test, b test '

m=re.match(pattern, input2)

print(m)

=>None

m=re.search(pattern, input2)

print(m)

=><_sre.SRE_Match object; span=(2, 6), match='test'>

print(m.group())

=>test

m=re.findall(pattern, input2)

print(m)

=>['test', 'test']

3. replace by sub()

html = ''' <ul id="list" class="list-group">

</ul>

　　　　'''

html = re.sub('<a.*?>|</a>', '', html)

print(html)

=> <ul id="list" class="list-group">

</ul>

pattern = '<li.*?>(.*?)</li>'

results = re.findall(pattern, html, re.S) # re.S改行

print(results)=> ["test1","test2"]

pattern = '<li.*>(.*)</li>'

results = re.findall(pattern, html, re.S) # re.S改行

print(results)=> ["test2"]

4. get match group by name

pattern = '<li.*?>(?P<text>.*?)</li>'

results = re.seach(pattern, html, re.S) # re.S改行

print(results.group(1))=>test1

print(results.group("text"))=>test1

5. match lookahead assertion. (?=, ?! )

input1='hello world!'

input2='hello goodbye!'

pattern1='hello (?=world)' #?=world: lookahead match

pattern2='hello (?!word)' #?!world: lookahead not match

r1=re.pattern(pattern1, input1)

print(r1.group())=>hello

r2=re.pattern(pattern2, input1)

print(r2.group())=>None

r3=re.pattern(pattern2, input2)

print(r3.group())=>hello

6. match lookbehind assertion. (?<=,?<! )

>>> m = re.search('(?<=abc)def', 'abcdef')  #?<=lookbehind match
>>> m.group(0)
'def'

m = re.search(r'(?<=-)\w+', 'spam-egg')     # must has - before \w+  
>>> m.group(0)
'egg'