Skip to content

Commit 5b4931c

Browse files
authored
Create fetch_papers.py
1 parent 3a8a24c commit 5b4931c

File tree

1 file changed

+40
-0
lines changed

1 file changed

+40
-0
lines changed

fetch_papers.py

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
'''
2+
Author: doodhwala, leezeeyee
3+
Python3 script to fetch papers
4+
'''
5+
6+
import os, re, requests, codecs
7+
8+
filename='README.md'
9+
directory = 'papers'
10+
if not os.path.exists(directory):
11+
os.makedirs(directory)
12+
papers = []
13+
with codecs.open(filename, encoding='utf-8', mode='r', buffering=1, errors='strict') as f:
14+
lines = f.read().split('\n')
15+
heading, section_path = '', ''
16+
for line in lines:
17+
if('## 20' in line):
18+
heading = line.strip().split('##')[1]
19+
win_restricted_chars = re.compile(r'[\^\/\\\:\*\?\"<>\|]')
20+
heading = win_restricted_chars.sub("", heading)
21+
section_path = os.path.join(directory, heading)
22+
if not os.path.exists(section_path):
23+
os.makedirs(section_path)
24+
if('[`[pdf]`]' in line):
25+
# The stars ensure you pick up only the top 100 papers
26+
# Modify the expression if you want to fetch all other papers as well
27+
result = re.search('(.*?)\[`\[pdf\]`\]\((.*?)\)', line)
28+
if(result):
29+
paper, url = result.groups()
30+
paper = win_restricted_chars.sub("", paper)
31+
paper=paper.strip('- ')
32+
# Auto - resume functionality
33+
if(not os.path.exists(os.path.join(section_path, paper + '.pdf'))):
34+
print('Fetching', paper)
35+
try:
36+
response = requests.get(url)
37+
with open(os.path.join(section_path, paper + '.pdf'), 'wb') as f:
38+
f.write(response.content)
39+
except requests.exceptions.RequestException as e:
40+
print("Error: {}".format(e))

0 commit comments

Comments
 (0)