Skip to content
Snippets Groups Projects
Commit 55b9bb3e authored by Iori Ichinose's avatar Iori Ichinose :speech_balloon:
Browse files

improvements to arxiv

parent b8500b48
No related branches found
No related tags found
No related merge requests found
......@@ -12,13 +12,13 @@ async def arc_handler(bot: Bot, event: Event, state: T_State):
args = str(event.get_message()).strip()
if args:
arg_list = args.split(maxsplit=1)
arg_list.append('nothing')
arg_list.append('')
state['args'] = (arg_list[0], arg_list[1])
return
await arxiv.finish()
@arxiv.get('args')
@arxiv.got('args')
async def handle_args(bot: Bot, event: Event, state: T_State):
command, param = state['args']
if command not in arxiv_commands:
......
......@@ -4,6 +4,8 @@ from .paper import Paper
def fetch_keyword(keyword: str) -> list[Paper]:
if keyword == '':
return []
try:
res = httpx.get('https://arxiv.org/search/', params={
'query': keyword, 'searchtype': 'all', 'source': 'header'
......@@ -18,27 +20,31 @@ def fetch_keyword(keyword: str) -> list[Paper]:
name='p', attrs={'class': 'title is-5 mathjax'})))
authors = [list(map(lambda x: str(x.string).strip(), author_list.find_all(name='a')))
for author_list in page.find_all(name='p', attrs={'class': 'authors'})]
paper_ids = list(map(lambda x: str(x.string).strip(),
paper_ids = list(map(lambda x: str(x.string).strip().lstrip('arXiv:'),
page.select('div > ol > li > div > p > a')))
print(titles, len(titles))
print(authors, len(authors))
print(paper_ids, len(paper_ids))
ret: list[Paper] = []
lim = min(10, len(titles))
lim = min(5, len(titles))
for i in range(lim):
ret.append(Paper(titles[i], authors[i], '', paper_ids[i]))
ret.append(Paper(titles[i], authors[i], paper_ids[i]))
return ret
def download(paper: Paper):
arxiv_id = paper.paper_id.lstrip('arXiv:')
arxiv_id = paper.paper_id
pdf = 'https://arxiv.org/pdf/{}.pdf'.format(arxiv_id)
ret = httpx.get(pdf)
with open(f'{arxiv_id}.pdf', 'wb') as f:
f.write(ret.content)
def fetch_info(paper: Paper) -> str:
ret = ''
if __name__ == '__main__':
papers = fetch_keyword('CNN')
download(papers[0])
......@@ -9,12 +9,12 @@ def handle_search(event: Event, keyword: str) -> str:
papers = fetch_keyword(keyword)
if papers == []:
return '没有找到相关文献'
ret = ''
ret: list[str] = []
for i, paper in enumerate(papers):
ret += f'No.{i}, Title: {paper.title} \n' + \
f'Authors: {", ".join(paper.authors)} \n' + \
f'Arxiv-ID: {paper.paper_id}\n'
return ret
ret.append(f'{i+1}: {paper.title} \n' +
f'Authors: {", ".join(paper.authors)} \n' +
f'Arxiv-ID: {paper.paper_id}\n')
return '\n'.join(ret)
def handle_add(event: Event, paper_id: str) -> str:
......
......@@ -3,12 +3,10 @@ class Paper:
self,
title: str,
authors: list[str],
abstract: str,
paper_id: str,
):
self.title = title
self.authors = authors
self.abstract = abstract
self.paper_id = paper_id
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment