First time update,Tired :(
This commit is contained in:
parent
475a7dbe62
commit
88fd550aba
97
manager/manager.py
Normal file
97
manager/manager.py
Normal file
@ -0,0 +1,97 @@
|
||||
#------------------#
|
||||
import bs4,requests,urllib,time,threading
|
||||
#------------------#
|
||||
|
||||
class MultiThreadDownload(threading.Thread):
|
||||
'''The Class for Multi-Thread Download'''
|
||||
'''Get from Internet and improved by Ghink Network Studio'''
|
||||
def __init__(self,url,startpos,endpos,f,UA):
|
||||
super(MultiThreadDownload,self).__init__()
|
||||
self.url=url
|
||||
self.startpos=startpos
|
||||
self.endpos=endpos
|
||||
self.fd=f
|
||||
self.UA=UA
|
||||
def download(self):
|
||||
headers=self.UA.update({"Range":"bytes=%s-%s"%(self.startpos,self.endpos)})
|
||||
res=requests.get(self.url,headers=headers)
|
||||
self.fd.seek(self.startpos)
|
||||
self.fd.write(res.content)
|
||||
def run(self):
|
||||
self.download()
|
||||
|
||||
class GOSManager(object):
|
||||
'''The Main Class of the Sync Manager'''
|
||||
def __init__(self,SiteName="GOSManager",SiteVersion="A0.0.1"):
|
||||
'''The global variable set function'''
|
||||
self.__SiteName=SiteName
|
||||
self.__SiteVersion=SiteVersion
|
||||
self.__UserAgent={'User-Agent':SiteName+'/'+SiteVersion+' ((GOSM Manager Alpha 0.0.1;Alpha))'}
|
||||
def Download(self,DownloadFrom,DownloadTo,ThreadNum=3):
|
||||
'''Multi-Thread download function'''
|
||||
if(DownloadFrom=="" or DownloadTo==""):
|
||||
return "Error:Wrong online address or local address for download."
|
||||
else:
|
||||
url = DownloadFrom
|
||||
filename = DownloadTo
|
||||
filesize = int(requests.head(url,headers=self.__UserAgent).headers['Content-Length'])
|
||||
threadnum = ThreadNum
|
||||
threading.BoundedSemaphore(threadnum)
|
||||
step = filesize // threadnum
|
||||
mtd_list = []
|
||||
start = 0
|
||||
end = -1
|
||||
tempf = open(filename,'w')
|
||||
tempf.close()
|
||||
with open(filename,'rb+') as f:
|
||||
fileno = f.fileno()
|
||||
while end < filesize -1:
|
||||
start = end +1
|
||||
end = start + step -1
|
||||
if end > filesize:
|
||||
end = filesize
|
||||
dup = os.dup(fileno)
|
||||
fd = os.fdopen(dup,'rb+',-1)
|
||||
t = MultiThreadDownload(url,start,end,fd,self.__UserAgent)
|
||||
t.start()
|
||||
mtd_list.append(t)
|
||||
for i in mtd_list:
|
||||
i.join()
|
||||
def GetHttpSyncList(self,url,domain=""):
|
||||
'''The function which used to get http links list of files were need to sync'''
|
||||
if(domain==""):
|
||||
domain=urllib.parse.urlparse(url).netloc
|
||||
List=[]
|
||||
#Determine the type of online file,only try to search link in html file
|
||||
if("text/html" in requests.head(url).headers['Content-Type']):
|
||||
html=requests.get(url)
|
||||
html.encoding='utf-8'
|
||||
soup=bs4.BeautifulSoup(html.text,"html.parser")
|
||||
#Get all links from the page
|
||||
for h in soup.find_all('a'):
|
||||
try:
|
||||
if(h['href']=="../" or "#" in h['href']):
|
||||
pass
|
||||
else:
|
||||
#Check the link format,make sure it start with "http://" or "https://"
|
||||
if("http://" in h['href'] or "https://" in h['href']):
|
||||
urlGet=h['href']
|
||||
else:
|
||||
urlGet=url+h['href']
|
||||
#Avoid outside link
|
||||
if(domain in urlGet):
|
||||
print(urlGet)
|
||||
#Recursion to get all links
|
||||
List.extend(HttpSync(urlGet,domain))
|
||||
#Avoid record any links link to dirs
|
||||
if("text/html" not in requests.head(urlGet).headers['Content-Type']):
|
||||
List.append(urlGet)
|
||||
except:
|
||||
pass
|
||||
#Duplicate removal
|
||||
ListReturn=[]
|
||||
for i in List:
|
||||
if(i not in ListReturn):
|
||||
ListReturn.append(i)
|
||||
return ListReturn
|
||||
|
0
manager/setting.json
Normal file
0
manager/setting.json
Normal file
Reference in New Issue
Block a user