''' Copyright Ghink Network Studio Website: https://www.ghink.net ''' from io import BufferedRandom import urllib import requests,os,time,json,threading,hashlib,re from urllib import parse from bs4 import BeautifulSoup as bs db=[] workPath="gmclapi/" dataPath=workPath+"data/" userAgent={'User-Agent':'GMCLAPI/0.0.1'} proxies = { "http": "http://127.0.0.1:4780" } entrance=["http://launchermeta.mojang.com/mc/game/version_manifest.json","http://dl.liteloader.com/versions/versions.json"] threadLock=[200,0] def log(info): info="{}{}".format(time.strftime("%Y/%m/%d %H:%M:%S [Sync Thread]", time.localtime()),info) print(info) with open(workPath+"logs.log","a+") as fb: fb.write(info+"\n") def database(): threadLock[1]+=1 global db with open(workPath+"database.db","r") as fb: db=json.loads(fb.read()) while True: with open(workPath+"database.db","r") as fb: if json.loads(fb.read())==db: continue with open(workPath+"database.db","w+") as fb: fb.write(json.dumps(db)) def syncMain(): global db for obj in entrance: while True: try: origin=requests.get(obj,headers=userAgent,proxies=proxies).content md5=hashlib.md5() md5.update(origin) hash=md5.hexdigest() switch=True for h in db: if h["hash"]==hash: switch=False break if switch: log("Synchronizing the file {} from {}".format(parse.urlparse(obj).path,obj)) i=0 for h in db: if h["path"]==parse.urlparse(obj).path: del switch[i] i+=1 with open(dataPath+hash,"wb") as fb: fb.write(origin) db.append({ "hash":hash, "source":obj, "path":parse.urlparse(obj).path }) break except: pass def syncJson(): synced=[] def syncThread(rec): global db threadLock[1]+=1 with open(dataPath+rec["hash"],"r") as fb: fall=re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',fb.read()) for obj in fall: if parse.urlparse(obj).path=="/": continue while True: try: switch=True for r in db: if r["path"]==parse.urlparse(obj).path: switch=False break if switch: log("Synchronizing the file {} from {}".format(parse.urlparse(obj).path,obj)) origin=requests.get(obj,headers=userAgent,proxies=proxies).content md5=hashlib.md5() md5.update(origin) hash=md5.hexdigest() with open(dataPath+hash,"wb") as fb: fb.write(origin) db.append({ "hash":hash, "source":obj, "path":parse.urlparse(obj).path }) break except: pass threadLock[1]-=1 while True: i=0 for rec in db: if ".json" in rec["path"] and rec["path"] not in synced: i+=1 if threadLock[0]>threadLock[1]: synced.append(rec["path"]) threading.Thread(target=syncThread,args=(rec,)).start() if i==0: break log("Synchronizing for json list finished") def syncForge(entrance="https://files.minecraftforge.net/net/minecraftforge/forge/"): global db crawed=[] def syncThread(entrance): global db threadLock[1]+=1 log("Crawling the page {}".format(entrance)) page=requests.get(entrance,headers=userAgent,proxies=proxies).text soup=bs(page,features="html5lib") pageurls=soup.find_all("a",href=True) for obj in pageurls: link=obj.get("href") if ".html" in link and "http" not in link and link not in crawed: crawed.append(link) threading.Thread(target=syncThread,args=(parse.urljoin(entrance,link),)).start() fall=re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',page) for obj in fall: switch=True for h in db: if h["path"]==parse.urlparse(obj).path or h["path"]==parse.urlparse(obj[48:]).path: switch=False break if switch: if ".jar" in obj or ".zip" in obj: log("Synchronizing the file {} from {}".format(parse.urlparse(obj[48:]).path,obj[48:])) while True: try: origin=requests.get(obj[48:],headers=userAgent,proxies=proxies).content md5=hashlib.md5() md5.update(origin) hash=md5.hexdigest() with open(dataPath+hash,"wb") as fb: fb.write(origin) db.append({ "hash":hash, "source":obj[48:], "path":parse.urlparse(obj[48:]).path }) break except: log("Synchronizing the file {} from {} failed!Retrying...".format(parse.urlparse(obj[48:]).path,obj[48:])) time.sleep(10) elif ".txt" in link: log("Synchronizing the file {} from {}".format(parse.urlparse(obj).path,obj)) while True: try: origin=requests.get(obj,headers=userAgent,proxies=proxies).content md5=hashlib.md5() md5.update(origin) hash=md5.hexdigest() with open(dataPath+hash,"wb") as fb: fb.write(origin) db.append({ "hash":hash, "source":obj, "path":parse.urlparse(obj).path }) break except: log("Synchronizing the file {} from {} failed!Retrying...".format(parse.urlparse(obj).path,obj)) time.sleep(10) threadLock[1]-=1 syncThread(entrance) def monitor(): while True: log(str(threadLock)) time.sleep(1) def main(): threading.Thread(target=database).start() #threading.Thread(target=monitor).start() time.sleep(1) while True: time.sleep(60*60*24) syncMain() threading.Thread(target=syncJson()).start() threading.Thread(target=syncForge()).start() if __name__=="__main__": main()