improvements to arxiv

55b9bb3e · Iori Ichinose · b8500b48 · 55b9bb3e · 55b9bb3e · 55b9bb3e
Commit 55b9bb3e authored 3 years ago by Iori Ichinose
--- a/src/plugins/arxiv/__init__.py
+++ b/src/plugins/arxiv/__init__.py
@@ -12,13 +12,13 @@ async def arc_handler(bot: Bot, event: Event, state: T_State):
    args = str(event.get_message()).strip()
    if args:
        arg_list = args.split(maxsplit=1)
-        arg_list.append('nothing')
+        arg_list.append('')
        state['args'] = (arg_list[0], arg_list[1])
        return
    await arxiv.finish()


-@arxiv.get('args')
+@arxiv.got('args')
 async def handle_args(bot: Bot, event: Event, state: T_State):
    command, param = state['args']
    if command not in arxiv_commands:

--- a/src/plugins/arxiv/fetch_core.py
+++ b/src/plugins/arxiv/fetch_core.py
@@ -4,6 +4,8 @@ from .paper import Paper


 def fetch_keyword(keyword: str) -> list[Paper]:
+    if keyword == '':
+        return []
    try:
        res = httpx.get('https://arxiv.org/search/', params={
            'query': keyword, 'searchtype': 'all', 'source': 'header'
@@ -18,27 +20,31 @@ def fetch_keyword(keyword: str) -> list[Paper]:
        name='p', attrs={'class': 'title is-5 mathjax'})))
    authors = [list(map(lambda x: str(x.string).strip(), author_list.find_all(name='a')))
               for author_list in page.find_all(name='p', attrs={'class': 'authors'})]
-    paper_ids = list(map(lambda x: str(x.string).strip(),
+    paper_ids = list(map(lambda x: str(x.string).strip().lstrip('arXiv:'),
                     page.select('div > ol > li > div > p > a')))

    print(titles, len(titles))
    print(authors, len(authors))
    print(paper_ids, len(paper_ids))
    ret: list[Paper] = []
-    lim = min(10, len(titles))
+    lim = min(5, len(titles))
    for i in range(lim):
-        ret.append(Paper(titles[i], authors[i], '', paper_ids[i]))
+        ret.append(Paper(titles[i], authors[i], paper_ids[i]))
    return ret


 def download(paper: Paper):
-    arxiv_id = paper.paper_id.lstrip('arXiv:')
+    arxiv_id = paper.paper_id
    pdf = 'https://arxiv.org/pdf/{}.pdf'.format(arxiv_id)
    ret = httpx.get(pdf)
    with open(f'{arxiv_id}.pdf', 'wb') as f:
        f.write(ret.content)


+def fetch_info(paper: Paper) -> str:
+    ret = ''
+
+
 if __name__ == '__main__':
    papers = fetch_keyword('CNN')
    download(papers[0])
--- a/src/plugins/arxiv/handlers.py
+++ b/src/plugins/arxiv/handlers.py
@@ -9,12 +9,12 @@ def handle_search(event: Event, keyword: str) -> str:
    papers = fetch_keyword(keyword)
    if papers == []:
        return '没有找到相关文献'
-    ret = ''
+    ret: list[str] = []
    for i, paper in enumerate(papers):
-        ret += f'No.{i}, Title: {paper.title} \n' + \
-            f'Authors: {", ".join(paper.authors)} \n' + \
-            f'Arxiv-ID: {paper.paper_id}\n'
-    return ret
+        ret.append(f'{i+1}: {paper.title} \n' +
+                   f'Authors: {", ".join(paper.authors)} \n' +
+                   f'Arxiv-ID: {paper.paper_id}\n')
+    return '\n'.join(ret)


 def handle_add(event: Event, paper_id: str) -> str:

--- a/src/plugins/arxiv/paper.py
+++ b/src/plugins/arxiv/paper.py
@@ -3,12 +3,10 @@ class Paper:
            self,
            title: str,
            authors: list[str],
-            abstract: str,
            paper_id: str,
    ):
        self.title = title
        self.authors = authors
-        self.abstract = abstract
        self.paper_id = paper_id