Ubuntu(Linux)をWindows(WSL)上で動かす
wsl --install
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
less --help
man less
sudo ls
sudo apt update sudo apt upgrade
sudo apt install wget
brew install wget
wget "https://www.dropbox.com/scl/fi/ahwe6wd78u3rusqy6j4yl/JPConstitution.txt?rlkey=cwnsj0vtj7e6nnl5je40qmd0m&dl=1" -O JPConstitution.txt
wget "https://www.dropbox.com/s/r89ahi6goxy5a2c/kokoro.txt&dl=1" -O kokoro.txt
sudo apt install unzip
brew install unzip
grep '正規表現' ファイル
sed -e 's/検索文字列/置換文字列/g' ファイル名
sudo apt install nkf
brew install nkf
less JPConstitution.txt
wc JPConstitution.txt wc -m JPConstitution.txt
nkf -S -w8 kokoro.txt |less nkf -S -w8 -Lu --overwrite kokoro.txt
sed -E -e 's/《.+?》//g;s/[#.+?]//g' kokoro.txt >kokoro2.txt
s/《.+?》//g s/[#.+?]//g
sed -E -f removeruby.sed kokoro.txt >kokoro2.txt
grep '読[まみむめもん]' kokoro.txt
ls|code -
explorer.exe
xeyes
sudo apt install gnome-mahjongg gnome-mahjongg
sudo apt remove gnome-mahjongg
sudo apt install mecab
brew install mecab
wget https://clrd.ninjal.ac.jp/unidic_archive/2302/unidic-cwj-202302.zip unzip unidic-cwj-202302.zip -d unidic-cwj
wget https://clrd.ninjal.ac.jp/unidic_archive/2203/UniDic-202203_60b_qkana.zip unzip UniDic-202203_60b_qkana.zip mv 60b_qkana unidic-qkana mv unidic-qkana/.dicrc unidic-qkana/dicrc
mecab -v
echo "ゼレンスキー大統領" | mecab -d unidic-cwj echo "井ノ上たきな" | mecab -d unidic-cwj
echo "政府の行為によつて再び戦争の惨禍が起ることのないやうにする" | mecab -d unidic-cwj echo "政府の行為によつて再び戦争の惨禍が起ることのないやうにする" | mecab -d unidic-qkana
mecab -d unidic-cwj JPConstitution.txt | less
mecab -d unidic-cwj -Ochamame JPConstitution.txt mecab -d unidic-qkana -Ochamame JPConstitution.txt
語彙素 | 語彙素読み | 語形 | 品詞 | 活用型 | 活用形 | 書字形 | 発音形 | 語種 | 語彙素ID |
;語彙素\t語彙素読み\t語形\t品詞\t活用型\t活用形\t書字形\t発音形\t語種\t語彙素ID node-format-chamame2 = \t%m\t%f[7]\t%f[6]\t%f[23]\t%F-[0,1,2,3]\t%f[4]\t%f[5]\t%f[8]\t%f[9]\t%f[12]\t%f[28]\n unk-format-chamame2 = \t%m\t\t\t%m\t未知語\t\t\t\t\t\t\n bos-format-chamame2 = B eos-format-chamame2 =
mecab -d unidic-cwj --node-format="\t%m\t%f[7]\t%f[6]\t%f[23]\t%F-[0,1,2,3]\t%f[4]\t%f[5]\t%f[8]\t%f[9]\t%f[12]\t%f[28]\n" --unk-format="\t%m\t\t\t%m\t未知語\t\t\t\t\t\t\n" --bos-format="B" JPConstitution.txt > JPConstitution_morph.txt mecab -d unidic-qkana --node-format="\t%m\t%f[7]\t%f[6]\t%f[15]\t%F-[0,1,2,3]\t%f[4]\t%f[5]\t%f[8]\t%f[9]\t%f[11]\t%f[26]\n" --unk-format="\t%m\t\t\t%m\t未知語\t\t\t\t\t\t\n" --bos-format="B" JPConstitution.txt > JPConstitution_morph2.txt
mecab -d unidic-cwj -Ochamame2 JPConstitution.txt > JPConstitution_morph.txt mecab -d unidic-qkana -Ochamame2 JPConstitution.txt > JPConstitution_morph2.txt
less JPConstitution_morph.txt less JPConstitution_morph2.txt
diff JPConstitution_morph.txt JPConstitution_morph2.txt
grep 接続詞 JPConstitution_morph.txt
grep 固有名詞 JPConstitution_morph.txt grep 固有名詞 JPConstitution_morph2.txt
grep 接続詞 JPConstitution_morph2.txt |sort |uniq -c grep $'\t副詞' JPConstitution_morph2.txt |sort |uniq -c
cut -f 10 JPConstitution_morph.txt
cut -f 6 JPConstitution_morph.txt|sort|uniq -c
cut -f 6 JPConstitution_morph.txt|sort|uniq -c|sort -nr
chmod +x *.sh
wget "https://www.dropbox.com/scl/fi/vsphond396zcoxaqlhlp3/chiisakimonoe.txt?rlkey=89a63g0ni4hdxaqzlb02h5cbu&dl=1" -O chiisakimonoe.txt nkf -S -w8 --overwrite chiisakimonoe.txt mecab -d unidic-cwj --node-format="\t%m\t%f[7]\t%f[6]\t%f[23]\t%F-[0,1,2,3]\t%f[4]\t%f[5]\t%f[8]\t%f[9]\t%f[12]\t%f[28]\n" --unk-format="\t%m\t\t\t%m\t未知語\t\t\t\t\t\t\n" --bos-format="B" chiisakimonoe.txt > chiisakimonoe_morph.txt grep $'\t副詞' chiisakimonoe_morph.txt |sort |uniq -c