{"id":1528,"date":"2023-04-04T17:20:40","date_gmt":"2023-04-04T08:20:40","guid":{"rendered":"https:\/\/fukugyouhistory.tokyo\/?p=1528"},"modified":"2023-04-05T07:39:44","modified_gmt":"2023-04-04T22:39:44","slug":"python%e3%80%80wordcloud%e3%82%92%e5%ad%a6%e7%bf%92%e3%81%97%e3%81%a6%e3%81%bf%e3%81%9f","status":"publish","type":"post","link":"https:\/\/fukugyouhistory.tokyo\/?p=1528","title":{"rendered":"Python\u3000WordCloud\u3092\u5b66\u7fd2\u3057\u3066\u307f\u305f"},"content":{"rendered":"\n<p class=\"wp-block-paragraph\"><a rel=\"noreferrer noopener\" href=\"http:\/\/amueller.github.io\/word_cloud\/\" target=\"_blank\">WordCloud<\/a>\u3068\u306f\u3001\u6587\u4e2d\u306e\u51fa\u73fe\u983b\u5ea6\u306e\u9ad8\u3044\u5358\u8a9e\u3092\u62bd\u51fa\u3057\u3066\u53ef\u8996\u5316\u3059\u308b\u30c4\u30fc\u30eb\u3067\u3059\u3002<br>\u65e5\u3005\u306e\u60c5\u5831\u53ce\u96c6\u306e\u7d50\u679c\u3092\u3001\u8996\u899a\u7684\u306b\u6349\u3048\u308b\u306e\u306b\u306f\u6709\u52b9\u306a\u30c4\u30fc\u30eb\u3068\u8a00\u3048\u307e\u3059\u3002<br>\u67d0\u60c5\u5831\u756a\u7d44\u3067\u306f\u3001\u6bce\u65e5\u3001\u756a\u7d44\u306e\u5192\u982d\u3067\u7d39\u4ecb\u3055\u308c\u3066\u3044\u305f\u306e\u3067\u3001\u898b\u305f\u3053\u3068\u304c\u3042\u308b\u4eba\u3082\u591a\u3044\u306e\u3067\u306f\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\"><a rel=\"noreferrer noopener\" href=\"http:\/\/amueller.github.io\/word_cloud\/\" target=\"_blank\">\u516c\u5f0f\u30b5\u30a4\u30c8<\/a>\u306e\u30b5\u30f3\u30d7\u30eb\u753b\u50cf\u3092\u8f09\u305b\u3066\u304a\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img data-dominant-color=\"8d9390\" data-has-transparency=\"false\" style=\"--dominant-color: #8d9390;\" decoding=\"async\" sizes=\"(max-width: 640px) 100vw, 640px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-17.png\" alt=\"\" class=\"wp-image-1529 not-transparent\" width=\"429\" height=\"322\" srcset=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-17.png 640w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-17-300x225.png 300w\" \/><\/figure>\n\n\n\n<!--more-->\n\n\n\n<p class=\"wp-block-paragraph\"><\/p>\n\n\n\n<h2 class=\"wp-block-heading\">WordCloud\u3092\u5b9f\u884c\u3057\u3066\u307f\u308b\uff08\u82f1\u8a9e\uff09<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u6700\u521d\u306b\u3001WordCloud\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3092\u3057\u3066\u304a\u304d\u307e\u3057\u3087\u3046\u3002<br>\u826f\u304f\u767a\u751f\u3059\u308b\u30a8\u30e9\u30fc\u306b\u3064\u3044\u3066\u8a18\u8f09\u3057\u307e\u3057\u305f\u306e\u3067\u3001\u53c2\u8003\u306b\u3057\u3066\u304f\u3060\u3055\u3044\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>!pip install wordcloud<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">\u203bpip\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u306e\u969b\u306b\u4ee5\u4e0b\u306e\u3088\u3046\u306a\u30a8\u30e9\u30fc\u304c\u51fa\u305f\u5834\u5408\u3001\u30a8\u30e9\u30fc\u30e1\u30c3\u30bb\u30fc\u30b8\u306eURL\u304b\u3089\u300cBuild Tools\u300d\u2192\u300cC++\u306b\u3088\u308b\u30c7\u30b9\u30af\u30c8\u30c3\u30d7\u958b\u767a\u300d\u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3057\u3066\u3001\u518d\u5b9f\u884c\u3057\u307e\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\">\n<p class=\"wp-block-paragraph\">\u2026\uff08\u7701\u7565\uff09\u2026<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">building &#8216;wordcloud.query_integral_image&#8217; extension<br>error: Microsoft Visual C++ 14.0 or greater is required. Get it with &#8220;Microsoft C++ Build Tools&#8221;: https:\/\/visualstudio.microsoft.com\/visual-cpp-build-tools\/<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u2026\uff08\u7701\u7565\uff09\u2026<\/p>\n<\/blockquote>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img data-dominant-color=\"dddcdc\" data-has-transparency=\"true\" style=\"--dominant-color: #dddcdc;\" decoding=\"async\" sizes=\"(max-width: 854px) 100vw, 854px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-18.png\" alt=\"\" class=\"wp-image-1530 has-transparency\" width=\"429\" height=\"159\" srcset=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-18.png 854w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-18-300x111.png 300w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-18-768x284.png 768w\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img data-dominant-color=\"f2f3f5\" data-has-transparency=\"true\" style=\"--dominant-color: #f2f3f5;\" decoding=\"async\" sizes=\"(max-width: 714px) 100vw, 714px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-20.png\" alt=\"\" class=\"wp-image-1534 has-transparency\" width=\"429\" height=\"300\" srcset=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-20.png 714w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-20-300x210.png 300w\" \/><\/figure>\n\n\n\n<p class=\"wp-block-paragraph\">\u203bpip\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u306e\u969b\u306b\u4ee5\u4e0b\u306e\u3088\u3046\u306a\u30a8\u30e9\u30fc\u304c\u51fa\u305f\u5834\u5408\u3001\u300cpip install wordcloud\u300d\u3067\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u304cPython3.11\u306b\u5bfe\u5fdc\u3057\u3066\u3044\u306a\u3044\uff08\u6700\u65b0\u306e\u30bd\u30fc\u30b9\u306b\u53cd\u6620\u3055\u308c\u3066\u3044\u306a\u3044\uff09\u305f\u3081\u767a\u751f\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<blockquote class=\"wp-block-quote is-layout-flow wp-block-quote-is-layout-flow\">\n<p class=\"wp-block-paragraph\">\u2026\uff08\u7701\u7565\uff09\u2026<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">wordcloud\/query_integral_image.c(196): fatal error C1083: Cannot open include file: &#8216;longintrepr.h&#8217;: No such file or directory<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u2026\uff08\u7701\u7565\uff09\u2026<\/p>\n<\/blockquote>\n\n\n\n<p class=\"wp-block-paragraph\">GitHub\u7d4c\u7531\u3067\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3059\u308b\u3068\u3001\u554f\u984c\u306a\u304f\u9032\u3081\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<br>\u53c2\u8003\u5143\uff1ahttps:\/\/github.com\/amueller\/word_cloud\/issues\/702#issuecomment-1445145114<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>!pip install git+https:\/\/github.com\/amueller\/word_cloud.git<\/code><\/pre>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<p class=\"wp-block-paragraph\">\u7121\u4e8b\u306bWordCloud\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u304c\u7d42\u308f\u308a\u307e\u3057\u305f\u3089\u3001\u4ee5\u4e0b\u306e\u30b3\u30fc\u30c9\u3092\u5b9f\u884c\u3057\u3066\u307f\u307e\u3057\u3087\u3046\u3002<br><a rel=\"noreferrer noopener\" href=\"http:\/\/amueller.github.io\/word_cloud\/\" target=\"_blank\">\u516c\u5f0f\u30b5\u30a4\u30c8<\/a>\u3067\u63d0\u4f9b\u3055\u308c\u3066\u3044\u308b\u82f1\u8a9e\u30c6\u30ad\u30b9\u30c8\uff08\u7c73\u56fd\u61b2\u6cd5\u306e\u6587\u7ae0\uff09\u3092\u7528\u3044\u3066\u4f5c\u6210\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import requests\nfrom wordcloud import WordCloud\nimport matplotlib.pyplot as plt\n\n# \u516c\u5f0f\u30b5\u30a4\u30c8\u306e\u30b5\u30f3\u30d7\u30eb\u30c6\u30ad\u30b9\u30c8\u306eURL\nurl = \"https:\/\/raw.githubusercontent.com\/amueller\/word_cloud\/master\/examples\/constitution.txt\"\n\n# \u30d5\u30a1\u30a4\u30eb\u306e\u53d6\u5f97\nresponse = requests.get(url)\n\n# \u30d5\u30a1\u30a4\u30eb\u306e\u30c6\u30ad\u30b9\u30c8\u306e\u53d6\u5f97\ntext = response.text\n\n# \u753b\u50cf\u4f5c\u6210\nwordcloud = WordCloud(width=800, height=600, background_color='white').generate(text)\n\n# Wordcloud\u3092\u8868\u793a\nplt.figure(figsize=(8, 6))\nplt.imshow(wordcloud)\nplt.axis('off')\nplt.show()\n\n# \u753b\u50cf\u4fdd\u5b58\n#wordcloud.to_file(\"result_wordcrowd.png\")<\/code><\/pre>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img data-dominant-color=\"dceade\" data-has-transparency=\"false\" style=\"--dominant-color: #dceade;\" decoding=\"async\" sizes=\"(max-width: 636px) 100vw, 636px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-39-jpg.webp\" alt=\"\" class=\"wp-image-1558 not-transparent\" width=\"431\" height=\"327\" srcset=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-39-jpg.webp 636w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-39-300x227.webp 300w\" \/><\/figure>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<p class=\"wp-block-paragraph\">\u30de\u30b9\u30af\u30a4\u30e1\u30fc\u30b8\u3092\u7528\u3044\u3066\u3001\u4efb\u610f\u306e\u5f62\u306b\u51fa\u529b\u3059\u308b\u3053\u3068\u3082\u53ef\u80fd\u3067\u3059\u3002<br>\u4ee5\u4e0b\u306e\u30de\u30b9\u30af\u753b\u50cf\uff08\u30a2\u30e1\u30ea\u30ab\u56fd\u571f\uff09\u3092\u300cAmerica.png\u300d\u3067\u7528\u610f\u3057\u3066\u3001\u5148\u307b\u3069\u3068\u540c\u3058\u30b3\u30fc\u30c9\u3092\u5b9f\u884c\u3057\u3066\u307f\u307e\u3059\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u307e\u305f\u3001StopWords\u306e\u30aa\u30d7\u30b7\u30e7\u30f3\u3092\u4f7f\u7528\u3059\u308b\u3068\u3001\u7d50\u679c\u304b\u3089\u9664\u5916\u3059\u308b\u30ad\u30fc\u30ef\u30fc\u30c9\u3092\u624b\u52d5\u3067\u6307\u5b9a\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<br>\u203b\u82f1\u8a9e\u306e\u5834\u5408\u3001\u6307\u5b9a\u3057\u306a\u304f\u3066\u3082Build-in\u3055\u308c\u3066\u3044\u308b\u9664\u5916\u30ef\u30fc\u30c9\u304c\u81ea\u52d5\u3067\u9069\u7528\u3055\u308c\u307e\u3059\u3002<br>\u3000https:\/\/github.com\/amueller\/word_cloud\/raw\/master\/wordcloud\/stopwords<br>\u3000\u65e5\u672c\u8a9e\u306e\u5834\u5408\u306b\u3001\u3053\u306e\u30aa\u30d7\u30b7\u30e7\u30f3\u3092\u4f7f\u7528\u3059\u308b\u5834\u9762\u304c\u51fa\u3066\u304f\u308b\u304b\u3068\u601d\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img data-dominant-color=\"575757\" data-has-transparency=\"false\" style=\"--dominant-color: #575757;\" decoding=\"async\" sizes=\"(max-width: 784px) 100vw, 784px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/America.png\" alt=\"\" class=\"wp-image-1560 not-transparent\" width=\"428\" height=\"271\" srcset=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/America.png 784w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/America-300x190.png 300w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/America-768x487.png 768w\" \/><\/figure>\n\n\n\n<pre class=\"wp-block-code\"><code>import requests\nfrom wordcloud import WordCloud\nimport matplotlib.pyplot as plt\nfrom PIL import Image\nimport numpy as np\n\n# \u516c\u5f0f\u30b5\u30a4\u30c8\u306e\u30b5\u30f3\u30d7\u30eb\u30c6\u30ad\u30b9\u30c8\u306eURL\nurl = \"https:\/\/raw.githubusercontent.com\/amueller\/word_cloud\/master\/examples\/constitution.txt\"\n\n# \u30d5\u30a1\u30a4\u30eb\u306e\u53d6\u5f97\nresponse = requests.get(url)\n\n# \u30d5\u30a1\u30a4\u30eb\u306e\u30c6\u30ad\u30b9\u30c8\u306e\u53d6\u5f97\ntext = response.text\n\n# \u9664\u5916\u30ad\u30fc\u30ef\u30fc\u30c9\nstop_words_en = &#91;'am', 'are', 'is', 'was', 'were', 'be', 'and', 'or', 'but', 'as', 'in', 'to', 'for', 'of', 'by', 'on', 'a', 'an', 'will', 'shall', 'this', 'that', 'it', 'the', 'what', 'when', 'where', 'which']\n\n# \u30de\u30b9\u30af\u753b\u50cf\u306e\u8aad\u307f\u8fbc\u307f\nmask = np.array(Image.open(\"America.png\"))\n\n# \u753b\u50cf\u4f5c\u6210(\u67a0\u7dda\u306a\u3057)\n#wordcloud = WordCloud(width=800, height=600, background_color='white', mask=mask, stopwords=stop_words_en).generate(text)\n\n# \u753b\u50cf\u4f5c\u6210(\u67a0\u7dda\u3042\u308a)\nwordcloud = WordCloud(width=800, height=600, background_color='white', mask=mask, stopwords=stop_words_en, contour_width=3, contour_color='steelblue').generate(text)\n\n# Wordcloud\u3092\u8868\u793a\nplt.figure(figsize=(8, 6))\nplt.imshow(wordcloud)\nplt.axis('off')\nplt.show()\n\n# \u753b\u50cf\u4fdd\u5b58\n#wordcloud.to_file(\"result_wordcrowd.png\")<\/code><\/pre>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img data-dominant-color=\"e4eeee\" data-has-transparency=\"false\" style=\"--dominant-color: #e4eeee;\" decoding=\"async\" sizes=\"(max-width: 640px) 100vw, 640px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-41.png\" alt=\"\" class=\"wp-image-1568 not-transparent\" width=\"429\" height=\"277\" srcset=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-41.png 640w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-41-300x194.png 300w\" \/><\/figure>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h2 class=\"wp-block-heading\">WordCloud\u3092\u5b9f\u884c\u3057\u3066\u307f\u308b\uff08\u65e5\u672c\u8a9e\uff09<\/h2>\n\n\n\n<p class=\"wp-block-paragraph\">\u82f1\u8a9e\u306e\u5834\u5408\u3001\u5358\u8a9e\u3068\u5358\u8a9e\u306e\u9593\u306b\u30b9\u30da\u30fc\u30b9\u304c\u3042\u308a\u307e\u3059\u306e\u3067\u3001\u6587\u5b57\u5217\u304b\u3089\u5358\u8a9e\u306e\u983b\u51fa\u5ea6\u5408\u3044\u3092\u7c21\u5358\u306b\u628a\u63e1\u3067\u304d\u307e\u3059\u3002<br>\u4e00\u65b9\u3067\u3001\u65e5\u672c\u8a9e\u306e\u5834\u5408\u306f\u3001\u5358\u8a9e\u30fb\u6587\u5b57\u5217\u304c\u3064\u306a\u304c\u3063\u3066\u3044\u308b\u306e\u3067\u3001WordCloud\u3067\u306f\u628a\u63e1\u30fb\u5224\u5b9a\u3067\u304d\u307e\u305b\u3093\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u305d\u3053\u3067\u3001\u65e5\u672c\u8a9e\u306e\u5834\u5408\u306f\u6587\u5b57\u5217\u3092\u5206\u3051\u308b\u305f\u3081\u306b\u300c\u5f62\u614b\u7d20\u89e3\u6790\u300d\u3068\u8a00\u3046\u3053\u3068\u3092\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002<br>\u203b\u5f62\u614b\u7d20\uff1a\u610f\u5473\u3092\u6301\u3064\u8a00\u8a9e\u306e\u6700\u5c0f\u5358\u4f4d<br>\u203b\u5f62\u614b\u7d20\u89e3\u6790\uff1a\u6587\u5b57\u5217\u3092\u54c1\u8a5e\uff08\u540d\u8a5e\u30fb\u52d5\u8a5e\u30fb\u52a9\u8a5e\u306a\u3069\uff09\u306b\u5206\u5272\u3057\u3066\u3001\u5fc5\u8981\u306a\u30ad\u30fc\u30ef\u30fc\u30c9\u3092\u629c\u304d\u51fa\u3059\u3053\u3068\u3067\u3059\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u5f62\u614b\u7d20\u89e3\u6790\u306b\u306f\u30012\u3064\u306e\u4ee3\u8868\u7684\u306a\u65b9\u6cd5\u304c\u3042\u308a\u307e\u3059\u3002\u4e00\u5fdc\u3001\u4e21\u65b9\u306e\u30d1\u30bf\u30fc\u30f3\u3067\u30b3\u30fc\u30c9\u3092\u8a18\u8f09\u3057\u3066\u3044\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li>Mecab \uff1a \u304c\u3063\u3064\u308a\u3068\u5f62\u614b\u7d20\u89e3\u6790\u3092\u3084\u308a\u305f\u3044\u3068\u3044\u3046\u5834\u5408<\/li>\n\n\n\n<li>Janome \uff1a \u6c17\u8efd\u306b\u5f62\u614b\u7d20\u89e3\u6790\u3092\u3084\u308a\u305f\u3044\u3068\u3044\u3046\u5834\u5408<\/li>\n<\/ul>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<p class=\"wp-block-paragraph\"><a rel=\"noreferrer noopener\" href=\"https:\/\/www.bing.com\/ck\/a?!&amp;&amp;p=630cf2ee03f18b0eJmltdHM9MTY4MDQ4MDAwMCZpZ3VpZD0xYjE5NTIxOC0xMGM4LTZhM2MtMDIzNy00MGZjMTFiMjZiZTkmaW5zaWQ9NTc1MA&amp;ptn=3&amp;hsh=3&amp;fclid=1b195218-10c8-6a3c-0237-40fc11b26be9&amp;psq=%e9%9d%92%e7%a9%ba%e6%96%87%e5%ba%ab&amp;u=a1aHR0cHM6Ly93d3cuYW96b3JhLmdyLmpwLw&amp;ntb=1\" target=\"_blank\">\u9752\u7a7a\u6587\u5eab<\/a>\u306e\u300c\u9280\u6cb3\u9244\u9053\u306e\u591c\u300d\uff08\u5bae\u6ca2\u8ce2\u6cbb\uff09\u306e\u4f5c\u54c1\u3092\u53d6\u5f97\u3057\u3066\u3001\u30b3\u30fc\u30c9\u3092\u66f8\u3044\u3066\u3044\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u307e\u305a\u3001\u4f5c\u54c1\u306e\u6587\u7ae0\u3092\u53d6\u5f97\u3057\u307e\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import requests\nfrom bs4 import BeautifulSoup\nfrom wordcloud import WordCloud\nimport matplotlib.pyplot as plt\nfrom PIL import Image\nimport numpy as np\n\n# \u4f5c\u54c1\u306eURL\nurl = \"https:\/\/www.aozora.gr.jp\/cards\/000081\/files\/43737_19215.html\"\n\n# \u4f5c\u54c1\u306eHTML\u30d5\u30a1\u30a4\u30eb\u306e\u53d6\u5f97\nres = requests.get(url)\n# \u30a8\u30f3\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0\u304c\u7570\u306a\u308b\u5834\u5408\u306e\u30a8\u30f3\u30b3\u30fc\u30c9\u51e6\u7406\uff08BeautifulSoup\u3092\u4f7f\u308f\u306a\u3044\u5834\u5408\uff09\n#res.encoding = res.apparent_encoding\n\n# res.text\u3092BeautifulSoup\u3067\u6271\u3046\u305f\u3081\u306e\u51e6\u7406\nsoup = BeautifulSoup(res.content, \"html.parser\")\n\n# \u6587\u7ae0\u3092\u53d6\u5f97\u3059\u308b\ntext = soup.text\n\n\n# \u6700\u521d\u306e500\u6587\u5b57\u3092\u8868\u793a\uff08\u30d7\u30ec\u30d3\u30e5\u30fc\u7528\uff09\nprint(text&#91;:500])<\/code><\/pre>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img data-dominant-color=\"f8f8f8\" data-has-transparency=\"false\" style=\"--dominant-color: #f8f8f8;\" decoding=\"async\" sizes=\"(max-width: 875px) 100vw, 875px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-40.png\" alt=\"\" class=\"wp-image-1578 not-transparent\" width=\"429\" height=\"219\" srcset=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-40.png 875w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-40-300x153.png 300w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-40-768x391.png 768w\" \/><\/figure>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">Mecab\u3092\u4f7f\u7528\u3059\u308b\u5834\u5408<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u307e\u305a\u306f\u3001Mecab\u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3057\u307e\u3059\u3002\u4e0b\u8a18\u306eURL\u304b\u3089Windows\u30a4\u30f3\u30b9\u30c8\u30fc\u30e9\u30fc\u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3057\u307e\u3059\u3002<br>https:\/\/github.com\/ikegami-yukino\/mecab\/releases\/<\/p>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img data-dominant-color=\"e1e3e4\" data-has-transparency=\"true\" style=\"--dominant-color: #e1e3e4;\" decoding=\"async\" sizes=\"(max-width: 299px) 100vw, 299px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-42.png\" alt=\"\" class=\"wp-image-1580 has-transparency\" width=\"240\" height=\"122\"\/><\/figure>\n\n\n\n<p class=\"wp-block-paragraph\">\u6587\u5b57\u30b3\u30fc\u30c9\u306fUTF-8\u306b\u3057\u3066\u304a\u304d\u307e\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img data-dominant-color=\"e8ecf0\" data-has-transparency=\"true\" style=\"--dominant-color: #e8ecf0;\" decoding=\"async\" sizes=\"(max-width: 499px) 100vw, 499px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-53.png\" alt=\"\" class=\"wp-image-1594 has-transparency\" width=\"428\" height=\"332\" srcset=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-53.png 499w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-53-300x233.png 300w\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img data-dominant-color=\"e7e7e8\" data-has-transparency=\"true\" style=\"--dominant-color: #e7e7e8;\" decoding=\"async\" sizes=\"(max-width: 499px) 100vw, 499px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-44.png\" alt=\"\" class=\"wp-image-1582 has-transparency\" width=\"430\" height=\"333\" srcset=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-44.png 499w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-44-300x233.png 300w\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img data-dominant-color=\"e6e8e9\" data-has-transparency=\"true\" style=\"--dominant-color: #e6e8e9;\" decoding=\"async\" sizes=\"(max-width: 499px) 100vw, 499px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-45.png\" alt=\"\" class=\"wp-image-1583 has-transparency\" width=\"429\" height=\"333\" srcset=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-45.png 499w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-45-300x233.png 300w\" \/><\/figure>\n\n\n\n<p class=\"wp-block-paragraph\">\u30b9\u30bf\u30fc\u30c8\u30e1\u30cb\u30e5\u30fc\u306b\u767b\u9332\u306f\u5fc5\u8981\u306a\u3044\u306e\u3067\u3001\u30c1\u30a7\u30c3\u30af\u3092\u5165\u308c\u307e\u3057\u305f\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img data-dominant-color=\"e8e8e8\" data-has-transparency=\"true\" style=\"--dominant-color: #e8e8e8;\" decoding=\"async\" sizes=\"(max-width: 499px) 100vw, 499px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-46.png\" alt=\"\" class=\"wp-image-1584 has-transparency\" width=\"432\" height=\"335\" srcset=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-46.png 499w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-46-300x233.png 300w\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img data-dominant-color=\"e9e9e9\" data-has-transparency=\"true\" style=\"--dominant-color: #e9e9e9;\" decoding=\"async\" sizes=\"(max-width: 499px) 100vw, 499px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-47.png\" alt=\"\" class=\"wp-image-1585 has-transparency\" width=\"429\" height=\"333\" srcset=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-47.png 499w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-47-300x233.png 300w\" \/><\/figure>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img data-dominant-color=\"b7b7d0\" data-has-transparency=\"true\" style=\"--dominant-color: #b7b7d0;\" decoding=\"async\" sizes=\"(max-width: 499px) 100vw, 499px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-48.png\" alt=\"\" class=\"wp-image-1586 has-transparency\" width=\"430\" height=\"333\" srcset=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-48.png 499w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-48-300x233.png 300w\" \/><\/figure>\n\n\n\n<p class=\"wp-block-paragraph\">\u7d9a\u3051\u3066\u3001\u30d1\u30b9\u3092\u8ffd\u52a0\u3057\u307e\u3059\u3002\u30b7\u30b9\u30c6\u30e0\u306e\u30d7\u30ed\u30d1\u30c6\u30a3\u3092\u958b\u304d\u3001\u74b0\u5883\u5909\u6570\u3092\u9078\u629e\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img data-dominant-color=\"f5f5f5\" data-has-transparency=\"false\" style=\"--dominant-color: #f5f5f5;\" decoding=\"async\" width=\"270\" height=\"300\" sizes=\"(max-width: 270px) 100vw, 270px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-49.png\" alt=\"\" class=\"wp-image-1587 not-transparent\"\/><\/figure>\n\n\n\n<p class=\"wp-block-paragraph\">\u4e0b\u5074\u306e\u300c\u30b7\u30b9\u30c6\u30e0\u74b0\u5883\u5909\u6570\u300d\u306e\u300cpath\u300d\u3092\u9078\u629e\u3057\u3066\u3001\u300c\u7de8\u96c6\u300d\u30dc\u30bf\u30f3\u3092\u62bc\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img data-dominant-color=\"e9ecee\" data-has-transparency=\"false\" style=\"--dominant-color: #e9ecee;\" decoding=\"async\" sizes=\"(max-width: 618px) 100vw, 618px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-50.png\" alt=\"\" class=\"wp-image-1588 not-transparent\" width=\"430\" height=\"407\" srcset=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-50.png 618w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-50-300x284.png 300w\" \/><\/figure>\n\n\n\n<p class=\"wp-block-paragraph\">\u3054\u81ea\u8eab\u306e\u74b0\u5883\u306b\u5408\u308f\u305b\u3066\u66f8\u304d\u63db\u3048\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u304c\u3001<br>\u300c\u65b0\u898f\u300d\u30dc\u30bf\u30f3\u3092\u62bc\u4e0b\u3057\u3066\u4ee5\u4e0b\u3092\u8ffd\u52a0\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u300cC:\\Program Files\\MeCab\\bin\u300d<\/p>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img data-dominant-color=\"e8ecef\" data-has-transparency=\"true\" style=\"--dominant-color: #e8ecef;\" decoding=\"async\" sizes=\"(max-width: 527px) 100vw, 527px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-51.png\" alt=\"\" class=\"wp-image-1589 has-transparency\" width=\"430\" height=\"409\" srcset=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-51.png 527w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-51-300x285.png 300w\" \/><\/figure>\n\n\n\n<p class=\"wp-block-paragraph\">\u30b9\u30bf\u30fc\u30c8\u30e1\u30cb\u30e5\u30fc\u3092\u53f3\u30af\u30ea\u30c3\u30af\u3057\u3066\u3001\u300c\u30d5\u30a1\u30a4\u30eb\u540d\u3092\u6307\u5b9a\u3057\u3066\u5b9f\u884c\u300d\u3092\u9078\u629e\u3057\u3001<br>\u300ccmd\u300d\u3068\u8a18\u5165\u3057\u3066\u300cOK\u300d\u3092\u62bc\u3057\u307e\u3059\u3002\u30b3\u30de\u30f3\u30c9\u30d7\u30ed\u30f3\u30d7\u30c8\u304c\u8d77\u52d5\u3057\u307e\u3059\u3002<br>\u8d77\u52d5\u5f8c\u3001\u300cmecab &#8211;version\u300d\u3068\u5165\u529b\u3057\u3066\u3001\u6b63\u3057\u304f\u30d0\u30fc\u30b8\u30e7\u30f3\u60c5\u5831\u304c\u51fa\u3066\u304f\u308c\u3070\u3001OK\u3067\u3059\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img data-dominant-color=\"212121\" data-has-transparency=\"true\" style=\"--dominant-color: #212121;\" decoding=\"async\" sizes=\"(max-width: 669px) 100vw, 669px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-52.png\" alt=\"\" class=\"wp-image-1590 has-transparency\" width=\"430\" height=\"247\" srcset=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-52.png 669w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-52-300x173.png 300w\" \/><\/figure>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<p class=\"wp-block-paragraph\">Windows\u30a4\u30f3\u30b9\u30c8\u30fc\u30e9\u30fc\u5b9f\u65bd\u5f8c\u3001pip\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3092\u5b9f\u65bd\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>!pip install mecab<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">Mecab\u306e\u5b9f\u884c\u3057\u3066\u3044\u308b\u5185\u5bb9\u304c\u78ba\u8a8d\u3057\u305f\u3044\u5834\u5408\u3001\u4ee5\u4e0b\u306e\u30b3\u30fc\u30c9\u3092\u5b9f\u884c\u3059\u308b\u3068\u5206\u304b\u308a\u307e\u3059\u3002<br>\u6587\u7ae0\u3092\u54c1\u8a5e\u3054\u3068\u306b\u5206\u89e3\u3057\u3066\u3044\u308b\u306e\u304c\u5206\u304b\u308b\u3068\u601d\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import MeCab\n\n# Mecab\u306e\u8d77\u52d5\ntagger = MeCab.Tagger() \ntagger.parse('') \n\ntext = \"Mecab\u3092\u5b9f\u884c\u3059\u308b\u3068\u3001\u3053\u306e\u3088\u3046\u306a\u7d50\u679c\u306b\u306a\u308a\u307e\u3059\u3002\"\n\n# \u89e3\u6790\u7d50\u679c\u3092\u5909\u6570\u306b\u5165\u308c\u308b\ntokens = tagger.parse(text)\n\n# \u89e3\u6790\u7d50\u679c\u3092\u8868\u793a\u3059\u308b\nprint(tokens)<\/code><\/pre>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img data-dominant-color=\"f1f1f1\" data-has-transparency=\"true\" style=\"--dominant-color: #f1f1f1;\" decoding=\"async\" sizes=\"(max-width: 567px) 100vw, 567px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-55.png\" alt=\"\" class=\"wp-image-1598 has-transparency\" width=\"431\" height=\"200\" srcset=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-55.png 567w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-55-300x139.png 300w\" \/><\/figure>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<p class=\"wp-block-paragraph\">\u65e9\u901f\u3001\u53d6\u5f97\u3057\u305f\u6587\u7ae0\u3092\u5f62\u614b\u7d20\u89e3\u6790\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import MeCab\nimport re\n\n# Mecab\u306e\u8d77\u52d5\ntagger = MeCab.Tagger() \nparsed = tagger.parse(text) \n\n# 1\u884c\u3054\u3068\u306b\u30ea\u30b9\u30c8\u306b\u5206\u3051\u308b\nlines = parsed.split('\\n')\n\nword_list = &#91;]\n\nfor line in lines:\n    # \u30b9\u30da\u30fc\u30b9\u3001\u30ab\u30f3\u30de\u3067\u30c7\u30fc\u30bf\u3092\u533a\u5207\u308b\n    tmp = re.split('\\t|,',line)\n\n    # tmp\u306e\u9577\u3055\u304c1\u672a\u6e80\u306e\u5834\u5408\u306f\u30b9\u30ad\u30c3\u30d7\u3059\u308b\n    if len(tmp) &lt; 2:\n        continue\n    \n    # \u540d\u8a5e\u306e\u307f\u3001\u30ef\u30fc\u30c9\u3092\u53d6\u308a\u51fa\u3059\n    if tmp&#91;1] == '\u540d\u8a5e':\n        word_list.append(tmp&#91;0])\n\n# word_list\u3092\u6587\u5b57\u5217\u306b\u5909\u63db\u3059\u308b\nword_chain = ' '.join(word_list)\n\n# \u6700\u521d\u306e20\u5358\u8a9e\u3060\u3051\u8868\u793a\uff08\u30d7\u30ec\u30d3\u30e5\u30fc\u7528\uff09\nprint(word_list&#91;0:20])\nprint(word_chain&#91;0:20])<\/code><\/pre>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img data-dominant-color=\"f4f4f4\" data-has-transparency=\"true\" style=\"--dominant-color: #f4f4f4;\" decoding=\"async\" sizes=\"(max-width: 907px) 100vw, 907px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-54.png\" alt=\"\" class=\"wp-image-1597 has-transparency\" width=\"599\" height=\"37\" srcset=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-54.png 907w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-54-300x19.png 300w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-54-768x47.png 768w\" \/><\/figure>\n\n\n\n<p class=\"wp-block-paragraph\">\u5f62\u614b\u7d20\u89e3\u6790\u3057\u305f\u7d50\u679c\u3092\u3001WordCloud\u306b\u6295\u3052\u8fbc\u3093\u3067\u307f\u307e\u3059\u3002<br>\u305f\u3060\u3057\u3001\u65e5\u672c\u8a9e\u304c\u6587\u5b57\u5316\u3051\u3057\u3066\u3057\u307e\u3046\u306e\u3067\u3001\u65e5\u672c\u8a9e\u30d5\u30a9\u30f3\u30c8\u306e\u6307\u5b9a\u3092\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002<br>\u307e\u305f\u3001\u65e5\u672c\u8a9e\u306e\u5834\u5408\u306f\u6f22\u5b57\u4e00\u6587\u5b57\u306e\u5358\u8a9e\u3082\u3042\u308b\u306e\u3067\u3001\u4e00\u6587\u5b57\u4ee5\u4e0a\u306e\u7d50\u679c\u3092\u51fa\u308b\u3088\u3046\u306b\u30aa\u30d7\u30b7\u30e7\u30f3\uff08regexp=&#8221;[\\w&#8217;]+&#8221;\uff09\u3092\u8ffd\u52a0\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import requests\nfrom wordcloud import WordCloud\nimport matplotlib.pyplot as plt\nfrom PIL import Image\nimport numpy as np\n\n# \u30d5\u30a9\u30f3\u30c8\u306e\u6307\u5b9a\nfont_path = \"meiryo.ttc\"\n\n# \u9664\u5916\u30ad\u30fc\u30ef\u30fc\u30c9\nstop_words_ja = &#91;'\u3082\u306e', '\u3053\u3068', '\u3068\u304d', '\u305d\u3046', '\u305f\u3061', '\u3053\u308c', '\u3088\u3046', '\u3053\u308c\u3089', '\u305d\u308c', '\u3059\u3079\u3066','\u306e','\u3093']\n\n# \u753b\u50cf\u4f5c\u6210\nwordcloud = WordCloud(width=800, height=600, font_path=font_path, background_color='white', stopwords=stop_words_ja, regexp=\"&#91;\\w']+\").generate(word_chain)\n\n# Wordcloud\u3092\u8868\u793a\nplt.figure(figsize=(8, 6))\nplt.imshow(wordcloud)\nplt.axis('off')\nplt.show()\n\n# \u753b\u50cf\u4fdd\u5b58\n#wordcloud.to_file(\"result_wordcrowd.png\")<\/code><\/pre>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img data-dominant-color=\"ddeae1\" data-has-transparency=\"false\" style=\"--dominant-color: #ddeae1;\" decoding=\"async\" sizes=\"(max-width: 636px) 100vw, 636px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-58.png\" alt=\"\" class=\"wp-image-1611 not-transparent\" width=\"431\" height=\"327\" srcset=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-58.png 636w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-58-300x227.png 300w\" \/><\/figure>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<p class=\"wp-block-paragraph\">\u5206\u5272\u3057\u3066\u8a18\u8f09\u3057\u3066\u5206\u304b\u308a\u306b\u304f\u3044\u90e8\u5206\u3082\u3042\u308b\u3068\u601d\u3044\u307e\u3059\u306e\u3067\u3001\u307e\u3068\u3081\u305f\u30b3\u30fc\u30c9\u3092\u8a18\u8f09\u3057\u3066\u304a\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import requests\nfrom bs4 import BeautifulSoup\nfrom wordcloud import WordCloud\nimport matplotlib.pyplot as plt\nfrom PIL import Image\nimport numpy as np\nimport MeCab\nimport re\n\n#----\u4f5c\u54c1\u306e\u6587\u7ae0\u306e\u53d6\u5f97----\n\n# \u4f5c\u54c1\u306eURL\nurl = \"https:\/\/www.aozora.gr.jp\/cards\/000081\/files\/43737_19215.html\"\n\n# \u4f5c\u54c1\u306eHTML\u30d5\u30a1\u30a4\u30eb\u306e\u53d6\u5f97\nres = requests.get(url)\n# \u30a8\u30f3\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0\u304c\u7570\u306a\u308b\u5834\u5408\u306e\u30a8\u30f3\u30b3\u30fc\u30c9\u51e6\u7406\uff08BeautifulSoup\u3092\u4f7f\u308f\u306a\u3044\u5834\u5408\uff09\n#res.encoding = res.apparent_encoding\n\n# res.text\u3092BeautifulSoup\u3067\u6271\u3046\u305f\u3081\u306e\u51e6\u7406\nsoup = BeautifulSoup(res.content, \"html.parser\")\n\n# \u6587\u7ae0\u3092\u53d6\u5f97\u3059\u308b\ntext = soup.text\n\n#----Macab\u306b\u3088\u308b\u5f62\u614b\u7d20\u89e3\u6790----\n\n# Mecab\u306e\u8d77\u52d5\ntagger = MeCab.Tagger() \nparsed = tagger.parse(text) \n\n# 1\u884c\u3054\u3068\u306b\u30ea\u30b9\u30c8\u306b\u5206\u3051\u308b\nlines = parsed.split('\\n')\n\nword_list = &#91;]\n\nfor line in lines:\n    # \u30b9\u30da\u30fc\u30b9\u3001\u30ab\u30f3\u30de\u3067\u30c7\u30fc\u30bf\u3092\u533a\u5207\u308b\n    tmp = re.split('\\t|,',line)\n\n    # tmp\u306e\u9577\u3055\u304c1\u672a\u6e80\u306e\u5834\u5408\u306f\u30b9\u30ad\u30c3\u30d7\u3059\u308b\n    if len(tmp) &lt; 2:\n        continue\n    \n    # \u540d\u8a5e\u306e\u307f\u3001\u30ef\u30fc\u30c9\u3092\u53d6\u308a\u51fa\u3059\n    if tmp&#91;1] == '\u540d\u8a5e':\n        word_list.append(tmp&#91;0])\n\n# word_list\u3092\u6587\u5b57\u5217\u306b\u5909\u63db\u3059\u308b\nword_chain = ' '.join(word_list)\n\n#----WordCloud\u306b\u3088\u308b\u53ef\u8996\u5316----\n\n# \u30d5\u30a9\u30f3\u30c8\u306e\u6307\u5b9a\nfont_path = \"meiryo.ttc\"\n\n# \u9664\u5916\u30ad\u30fc\u30ef\u30fc\u30c9\nstop_words_ja = &#91;'\u3082\u306e', '\u3053\u3068', '\u3068\u304d', '\u305d\u3046', '\u305f\u3061', '\u3053\u308c', '\u3088\u3046', '\u3053\u308c\u3089', '\u305d\u308c', '\u3059\u3079\u3066','\u306e','\u3093']\n\n# \u753b\u50cf\u4f5c\u6210\nwordcloud = WordCloud(width=800, height=600, font_path=font_path, background_color='white', stopwords=stop_words_ja, regexp=\"&#91;\\w']+\").generate(word_chain)\n\n# Wordcloud\u3092\u8868\u793a\nplt.figure(figsize=(8, 6))\nplt.imshow(wordcloud)\nplt.axis('off')\nplt.show()\n\n# \u753b\u50cf\u4fdd\u5b58\n#wordcloud.to_file(\"result_wordcrowd.png\")<\/code><\/pre>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<h3 class=\"wp-block-heading\">Janome\u3092\u4f7f\u7528\u3059\u308b\u5834\u5408<\/h3>\n\n\n\n<p class=\"wp-block-paragraph\">\u307e\u305a\u306f\u3001Janome\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3092\u3057\u307e\u3059\u3002pip\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u306e\u307f\u3067OK\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>!pip install Janome<\/code><\/pre>\n\n\n\n<p class=\"wp-block-paragraph\">Janome\u306e\u5b9f\u884c\u3057\u3066\u3044\u308b\u5185\u5bb9\u304c\u78ba\u8a8d\u3057\u305f\u3044\u5834\u5408\u3001\u4ee5\u4e0b\u306e\u30b3\u30fc\u30c9\u3092\u5b9f\u884c\u3059\u308b\u3068\u5206\u304b\u308a\u307e\u3059\u3002<br>\u6587\u7ae0\u3092\u54c1\u8a5e\u3054\u3068\u306b\u5206\u89e3\u3057\u3066\u3044\u308b\u306e\u304c\u5206\u304b\u308b\u3068\u601d\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>from janome.tokenizer import Tokenizer\n\n# Janome\u306e\u8d77\u52d5\nt = Tokenizer()\n\ntext = \"Janome\u3092\u5b9f\u884c\u3059\u308b\u3068\u3001\u3053\u306e\u3088\u3046\u306a\u7d50\u679c\u306b\u306a\u308a\u307e\u3059\u3002\"\n\n# \u89e3\u6790\u7d50\u679c\u3092\u5909\u6570\u306b\u5165\u308c\u308b\nfor token in t.tokenize(text):\n    # \u89e3\u6790\u7d50\u679c\u3092\u8868\u793a\u3059\u308b\n    print(token)<\/code><\/pre>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img data-dominant-color=\"f1f1f1\" data-has-transparency=\"true\" style=\"--dominant-color: #f1f1f1;\" decoding=\"async\" sizes=\"(max-width: 573px) 100vw, 573px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-57.png\" alt=\"\" class=\"wp-image-1609 has-transparency\" width=\"429\" height=\"186\" srcset=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-57.png 573w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-57-300x130.png 300w\" \/><\/figure>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<p class=\"wp-block-paragraph\">Mecab\u3067\u8aac\u660e\u3057\u305f\u5185\u5bb9\u3068\u3001\u5f62\u614b\u7d20\u89e3\u6790\u306e\u90e8\u5206\u4ee5\u5916\u306f\u540c\u3058\u3067\u3059\u306e\u3067\u3001\u307e\u3068\u3081\u305f\u30b3\u30fc\u30c9\u3067\u8a18\u8f09\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>import requests\nfrom bs4 import BeautifulSoup\nfrom wordcloud import WordCloud\nimport matplotlib.pyplot as plt\nfrom PIL import Image\nimport numpy as np\nfrom janome.tokenizer import Tokenizer\nimport re\n\n#----\u4f5c\u54c1\u306e\u6587\u7ae0\u306e\u53d6\u5f97----\n\n# \u4f5c\u54c1\u306eURL\nurl = \"https:\/\/www.aozora.gr.jp\/cards\/000081\/files\/43737_19215.html\"\n\n# \u4f5c\u54c1\u306eHTML\u30d5\u30a1\u30a4\u30eb\u306e\u53d6\u5f97\nres = requests.get(url)\n# \u30a8\u30f3\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0\u304c\u7570\u306a\u308b\u5834\u5408\u306e\u30a8\u30f3\u30b3\u30fc\u30c9\u51e6\u7406\uff08BeautifulSoup\u3092\u4f7f\u308f\u306a\u3044\u5834\u5408\uff09\n#res.encoding = res.apparent_encoding\n\n# res.text\u3092BeautifulSoup\u3067\u6271\u3046\u305f\u3081\u306e\u51e6\u7406\nsoup = BeautifulSoup(res.content, \"html.parser\")\n\n# \u6587\u7ae0\u3092\u53d6\u5f97\u3059\u308b\ntext = soup.text\n\n#----Janome\u306b\u3088\u308b\u5f62\u614b\u7d20\u89e3\u6790----\n\n# Janome\u306e\u8d77\u52d5\nt = Tokenizer()\n\n# 1\u884c\u3054\u3068\u306bJanome\u306e\u7d50\u679c\u304c\u5165\u308b\ntoken = t.tokenize(text)\n\nword_list = &#91;]\n\nfor line in token:\n    # \u30b9\u30da\u30fc\u30b9\u3001\u30ab\u30f3\u30de\u3067\u30c7\u30fc\u30bf\u3092\u533a\u5207\u308b\n    tmp = re.split('\\t|,',str(line))\n\n    # tmp\u306e\u9577\u3055\u304c1\u672a\u6e80\u306e\u5834\u5408\u306f\u30b9\u30ad\u30c3\u30d7\u3059\u308b\n    if len(tmp) &lt; 2:\n        continue\n    \n    # \u540d\u8a5e\u306e\u307f\u3001\u30ef\u30fc\u30c9\u3092\u53d6\u308a\u51fa\u3059\n    if tmp&#91;1] == '\u540d\u8a5e':\n        word_list.append(tmp&#91;0])\n\n# word_list\u3092\u6587\u5b57\u5217\u306b\u5909\u63db\u3059\u308b\nword_chain = ' '.join(word_list)\n\n#----WordCloud\u306b\u3088\u308b\u53ef\u8996\u5316----\n\n# \u30d5\u30a9\u30f3\u30c8\u306e\u6307\u5b9a\nfont_path = \"meiryo.ttc\"\n\n# \u9664\u5916\u30ad\u30fc\u30ef\u30fc\u30c9\nstop_words_ja = &#91;'\u3082\u306e', '\u3053\u3068', '\u3068\u304d', '\u305d\u3046', '\u305f\u3061', '\u3053\u308c', '\u3088\u3046', '\u3053\u308c\u3089', '\u305d\u308c', '\u3059\u3079\u3066','\u306e','\u3093']\n\n# \u753b\u50cf\u4f5c\u6210\nwordcloud = WordCloud(width=800, height=600, font_path=font_path, background_color='white', stopwords=stop_words_ja, regexp=\"&#91;\\w']+\").generate(word_chain)\n\n# Wordcloud\u3092\u8868\u793a\nplt.figure(figsize=(8, 6))\nplt.imshow(wordcloud)\nplt.axis('off')\nplt.show()\n\n# \u753b\u50cf\u4fdd\u5b58\n#wordcloud.to_file(\"result_wordcrowd.png\")<\/code><\/pre>\n\n\n\n<figure class=\"wp-block-image size-full is-resized\"><img data-dominant-color=\"dbe3e5\" data-has-transparency=\"false\" style=\"--dominant-color: #dbe3e5;\" decoding=\"async\" sizes=\"(max-width: 636px) 100vw, 636px\" src=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-60.png\" alt=\"\" class=\"wp-image-1615 not-transparent\" width=\"432\" height=\"327\" srcset=\"https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-60.png 636w, https:\/\/fukugyouhistory.tokyo\/wp-content\/uploads\/2023\/04\/image-60-300x227.png 300w\" \/><\/figure>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<p class=\"wp-block-paragraph\">\u4f55\u3068\u304b\u8868\u793a\u304c\u3067\u304d\u307e\u3057\u305f\u3002\u3042\u3068\u306f\u3001\u5143\u3068\u3059\u308b\u30c7\u30fc\u30bf\u3092\u4f55\u306b\u3059\u308b\u304b\u304c\u30dd\u30a4\u30f3\u30c8\u306b\u306a\u308a\u305d\u3046\u3067\u3059\u3002<br>\u526f\u696d\u306b\u95a2\u3059\u308b\u3053\u3068\u3001AI\u306b\u95a2\u3059\u308b\u3053\u3068\u3001Web\u30b9\u30af\u30ec\u30a4\u30d4\u30f3\u30b0\u3068\u5408\u308f\u305b\u3066\u4f55\u304b\u3067\u304d\u306a\u3044\u304b\u306a\u3002<\/p>\n\n\n\n<p class=\"wp-block-paragraph\">\u3068\u308a\u3042\u3048\u305a\u3001\u3053\u3053\u307e\u3067\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>WordCloud\u3068\u306f\u3001\u6587\u4e2d\u306e\u51fa\u73fe\u983b\u5ea6\u306e\u9ad8\u3044\u5358\u8a9e\u3092\u62bd\u51fa\u3057\u3066\u53ef\u8996\u5316\u3059\u308b\u30c4\u30fc\u30eb\u3067\u3059\u3002\u65e5\u3005\u306e\u60c5\u5831\u53ce\u96c6\u306e\u7d50\u679c\u3092\u3001\u8996\u899a\u7684\u306b\u6349\u3048\u308b\u306e\u306b\u306f\u6709\u52b9\u306a\u30c4\u30fc\u30eb\u3068\u8a00\u3048\u307e\u3059\u3002\u67d0\u60c5\u5831\u756a\u7d44\u3067\u306f\u3001\u6bce\u65e5\u3001\u756a\u7d44\u306e\u5192\u982d\u3067\u7d39\u4ecb\u3055\u308c\u3066\u3044\u305f\u306e\u3067\u3001\u898b\u305f\u3053\u3068\u304c\u3042\u308b\u4eba\u3082 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[8],"tags":[],"class_list":["post-1528","post","type-post","status-publish","format-standard","category-python"],"_links":{"self":[{"href":"https:\/\/fukugyouhistory.tokyo\/index.php?rest_route=\/wp\/v2\/posts\/1528","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/fukugyouhistory.tokyo\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/fukugyouhistory.tokyo\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/fukugyouhistory.tokyo\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/fukugyouhistory.tokyo\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=1528"}],"version-history":[{"count":34,"href":"https:\/\/fukugyouhistory.tokyo\/index.php?rest_route=\/wp\/v2\/posts\/1528\/revisions"}],"predecessor-version":[{"id":1620,"href":"https:\/\/fukugyouhistory.tokyo\/index.php?rest_route=\/wp\/v2\/posts\/1528\/revisions\/1620"}],"wp:attachment":[{"href":"https:\/\/fukugyouhistory.tokyo\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=1528"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/fukugyouhistory.tokyo\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=1528"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/fukugyouhistory.tokyo\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=1528"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}