-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathparser.py
More file actions
43 lines (39 loc) · 1.28 KB
/
Copy pathparser.py
File metadata and controls
43 lines (39 loc) · 1.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from ftplib import FTP
from dateutil import parser
import os
from configparser import ConfigParser
from time import sleep
# Login Block
setup = ConfigParser()
setup.read('config.ini')
server = setup['Host']['Domain']
directory = setup['Host']['Directory']
ftp = FTP(server)
ftp.login()
ftp.cwd(directory)
# Directory listing
gz = [x for x in ftp.nlst() if x.endswith('.gz')]
for index, all in enumerate(gz):
timestamp = ftp.voidcmd(f"MDTM {all}")[4:].strip()
parseStamp = str(parser.parse(timestamp))
parseDate = parseStamp.split()
print(f"{index} {all} {parseDate[0]} {parseDate[1]}")
num = int(input("Select a start-point? "))
end = int(input("Select an end-point? "))
if num == int(num):
if end == int(end):
newgz = gz[num:end+1]
# with open('results.txt', 'w') as wb:
for x in newgz:
# wb.write(f"{are}\n")
with open(f'txt/{x}', 'wb') as fp:
ftp.retrbinary(f'RETR {x}', fp.write)
# wb.close()
ftp.quit()
sleep(1)
#OS Commands for parsing and streaming to text files.
os.chdir('txt')
os.system('gzip -d *.gz*')
job = "for i in $(dir); do $(grep 'ArticleId IdType=\"doi\"' $i | awk -F\">\" '{print $2}' | awk -F\"<\" '{print $1}' > $(basename -s .xml $i).txt); done"
os.system(job)
os.system('rm -rf *.xml*')