(1)形態素解析サンプル
! pip install mecab-python3 unidic-lite
import MeCab
print(MeCab.Tagger().parse("今年もあと3か月を切りました!"))
(2)特許庁APIからトークン取得
import requests
import json
import pprint
url = 'https://ip-data.jpo.go.jp/XXXX/XXXXX'
response = requests.post(url, data={'grant_type': 'password', 'username': '特許庁からもらったID', 'password': '特許庁からもらったパスワード'})
a = json.loads(response.content.decode('utf-8'))
print(a)
(3)トークンを使って特許情報取得
b = a["access_token"]
c = 'Bearer ' + b
h = {'Authorization': c}
url = 'https://ip-data.jpo.go.jp/api/patent/v1/app_progress/2020008423'
response = requests.get(url, headers=h)
d = json.loads(response.text)
pprint.pprint(d)
(4)必要な情報のみを表示
e= d['result']['data']['ADPublicationNumber']
print(e)
(5)審査経過で更新された部分のみを知る
from google.colab import drive
drive.mount('/content/drive')
import difflib
f = open("/content/drive/MyDrive/previousfile.txt", "r", encoding="UTF-8")
pre = f.read()
f.close
res = difflib.ndiff(pre.split(','), response.text.split(','))
for r in res:
if r[0:1] in ['+', '-']:
print(r)
f = open("/content/drive/MyDrive/previousfile.txt", "w", encoding="UTF-8")
f.write(response.text)
f.close()
(6)次年度の年金支払期日を知る
b = a["access_token"]
c = 'Bearer ' + b
h = {'Authorization': c}
url = 'https://ip-data.jpo.go.jp/api/patent/v1/registration_info/2014188113'
response = requests.get(url, headers=h)
d = json.loads(response.text)
pprint.pprint(d)
e = d['result']['data']['nextPensionPaymentDate']
print(e)
(7)WORDをPythonから操作する
! pip install python-docx
import docx
from google.colab import drive
drive.mount('/content/drive')
doc = docx.Document("/content/drive/MyDrive/sample.docx")
print("danraku no kazu:", len(doc.paragraphs))
print("saisyo no danraku naiyou:", doc.paragraphs[0].text)
print("saisyo no danraku mojisuu:", len(doc.paragraphs[0].text))
mojisuu = 0
for i in doc.paragraphs:
mojisuu = mojisuu + len(i.text)
print("goukei mojisuu:", mojisuu)
print("seikyuugaku (20YEN/moji):", mojisuu * 20)