#------------------# import bs4,requests,urllib,time,threading,os #------------------# class MultiThreadDownload(threading.Thread): '''The Class for Multi-Thread Download''' '''Get from Internet and improved by Ghink Network Studio''' def __init__(self,url,startpos,endpos,f,UA): super(MultiThreadDownload,self).__init__() self.url=url self.startpos=startpos self.endpos=endpos self.fd=f self.UA=UA def download(self): headers=self.UA.update({"Range":"bytes=%s-%s"%(self.startpos,self.endpos)}) res=requests.get(self.url,headers=headers) self.fd.seek(self.startpos) self.fd.write(res.content) def run(self): self.download() class GOSManager(object): '''The Main Class of the Sync Manager''' def __init__(self,SiteName="GOSManager",SiteVersion="A0.0.1"): '''The global variable set function''' self.__SiteName=SiteName self.__SiteVersion=SiteVersion self.__UserAgent={'User-Agent':SiteName+'/'+SiteVersion+' ((GOSM Manager Alpha 0.0.1;Alpha))'} def Download(self,DownloadFrom,DownloadTo,ThreadNum=3): '''Multi-Thread download function''' if(DownloadFrom=="" or DownloadTo==""): return "Error:Wrong online address or local address for download." else: url = DownloadFrom filename = DownloadTo filesize = int(requests.head(url,headers=self.__UserAgent).headers['Content-Length']) threadnum = ThreadNum threading.BoundedSemaphore(threadnum) step = filesize // threadnum mtd_list = [] start = 0 end = -1 tempf = open(filename,'w') tempf.close() with open(filename,'rb+') as f: fileno = f.fileno() while end < filesize -1: start = end +1 end = start + step -1 if end > filesize: end = filesize dup = os.dup(fileno) fd = os.fdopen(dup,'rb+',-1) t = MultiThreadDownload(url,start,end,fd,self.__UserAgent) t.start() mtd_list.append(t) for i in mtd_list: i.join() def GetHttpSyncList(self,url,domain=""): '''The function which used to get http links list of files were need to sync''' if(domain==""): domain=urllib.parse.urlparse(url).netloc List=[] #Determine the type of online file,only try to search link in html file if("text/html" in requests.head(url).headers['Content-Type']): html=requests.get(url) html.encoding='utf-8' soup=bs4.BeautifulSoup(html.text,"html.parser") #Get all links from the page for h in soup.find_all('a'): try: if(h['href']=="../" or "#" in h['href']): pass else: #Check the link format,make sure it start with "http://" or "https://" if("http://" in h['href'] or "https://" in h['href']): urlGet=h['href'] else: urlGet=url+h['href'] #Avoid outside link if(domain in urlGet): #Recursion to get all links List.extend(HttpSync(urlGet,domain)) #Avoid record any links link to dirs if("text/html" not in requests.head(urlGet).headers['Content-Type']): List.append(urlGet) except: pass #Duplicate removal ListReturn=[] for i in List: if(i not in ListReturn): ListReturn.append(i) return ListReturn def RsyncSync(self,From,To): os.system("rsync -avrt "+From+" "+To)