This repository has been archived on 2022-12-28. You can view files and clone it, but cannot push or open issues or pull requests.
gmclapi/master/sync.py

190 lines
7.5 KiB
Python
Raw Normal View History

2021-06-14 07:35:53 +00:00
'''
Copyright Ghink Network Studio
Website: https://www.ghink.net
'''
from io import BufferedRandom
import urllib
import requests,os,time,json,threading,hashlib,re
from urllib import parse
from bs4 import BeautifulSoup as bs
db=[]
workPath="gmclapi/"
dataPath=workPath+"data/"
userAgent={'User-Agent':'GMCLAPI/0.0.1'}
proxies = {
"http": "http://127.0.0.1:4780"
}
entrance=["http://launchermeta.mojang.com/mc/game/version_manifest.json","http://dl.liteloader.com/versions/versions.json"]
threadLock=[200,0]
def log(info):
info="{}{}".format(time.strftime("%Y/%m/%d %H:%M:%S [Sync Thread]", time.localtime()),info)
print(info)
with open(workPath+"logs.log","a+") as fb:
fb.write(info+"\n")
def database():
threadLock[1]+=1
global db
with open(workPath+"database.db","r") as fb:
db=json.loads(fb.read())
while True:
with open(workPath+"database.db","r") as fb:
if json.loads(fb.read())==db:
continue
with open(workPath+"database.db","w+") as fb:
fb.write(json.dumps(db))
def syncMain():
global db
for obj in entrance:
while True:
try:
origin=requests.get(obj,headers=userAgent,proxies=proxies).content
md5=hashlib.md5()
md5.update(origin)
hash=md5.hexdigest()
switch=True
for h in db:
if h["hash"]==hash:
switch=False
break
if switch:
log("Synchronizing the file {} from {}".format(parse.urlparse(obj).path,obj))
i=0
for h in db:
if h["path"]==parse.urlparse(obj).path:
del switch[i]
i+=1
with open(dataPath+hash,"wb") as fb:
fb.write(origin)
db.append({
"hash":hash,
"source":obj,
"path":parse.urlparse(obj).path
})
break
except:
pass
def syncJson():
synced=[]
def syncThread(rec):
global db
threadLock[1]+=1
with open(dataPath+rec["hash"],"r") as fb:
fall=re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',fb.read())
for obj in fall:
if parse.urlparse(obj).path=="/":
continue
while True:
try:
switch=True
for r in db:
if r["path"]==parse.urlparse(obj).path:
switch=False
break
if switch:
log("Synchronizing the file {} from {}".format(parse.urlparse(obj).path,obj))
origin=requests.get(obj,headers=userAgent,proxies=proxies).content
md5=hashlib.md5()
md5.update(origin)
hash=md5.hexdigest()
with open(dataPath+hash,"wb") as fb:
fb.write(origin)
db.append({
"hash":hash,
"source":obj,
"path":parse.urlparse(obj).path
})
break
except:
pass
threadLock[1]-=1
while True:
i=0
for rec in db:
if ".json" in rec["path"] and rec["path"] not in synced:
i+=1
if threadLock[0]>threadLock[1]:
synced.append(rec["path"])
threading.Thread(target=syncThread,args=(rec,)).start()
if i==0:
break
log("Synchronizing for json list finished")
def syncForge(entrance="https://files.minecraftforge.net/net/minecraftforge/forge/"):
global db
crawed=[]
def syncThread(entrance):
global db
threadLock[1]+=1
log("Crawling the page {}".format(entrance))
page=requests.get(entrance,headers=userAgent,proxies=proxies).text
soup=bs(page,features="html5lib")
pageurls=soup.find_all("a",href=True)
for obj in pageurls:
link=obj.get("href")
if ".html" in link and "http" not in link and link not in crawed:
crawed.append(link)
threading.Thread(target=syncThread,args=(parse.urljoin(entrance,link),)).start()
fall=re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',page)
for obj in fall:
switch=True
for h in db:
if h["path"]==parse.urlparse(obj).path or h["path"]==parse.urlparse(obj[48:]).path:
switch=False
break
if switch:
if ".jar" in obj or ".zip" in obj:
log("Synchronizing the file {} from {}".format(parse.urlparse(obj[48:]).path,obj[48:]))
while True:
try:
origin=requests.get(obj[48:],headers=userAgent,proxies=proxies).content
md5=hashlib.md5()
md5.update(origin)
hash=md5.hexdigest()
with open(dataPath+hash,"wb") as fb:
fb.write(origin)
db.append({
"hash":hash,
"source":obj[48:],
"path":parse.urlparse(obj[48:]).path
})
break
except:
log("Synchronizing the file {} from {} failed!Retrying...".format(parse.urlparse(obj[48:]).path,obj[48:]))
time.sleep(10)
elif ".txt" in link:
log("Synchronizing the file {} from {}".format(parse.urlparse(obj).path,obj))
while True:
try:
origin=requests.get(obj,headers=userAgent,proxies=proxies).content
md5=hashlib.md5()
md5.update(origin)
hash=md5.hexdigest()
with open(dataPath+hash,"wb") as fb:
fb.write(origin)
db.append({
"hash":hash,
"source":obj,
"path":parse.urlparse(obj).path
})
break
except:
log("Synchronizing the file {} from {} failed!Retrying...".format(parse.urlparse(obj).path,obj))
time.sleep(10)
threadLock[1]-=1
syncThread(entrance)
def monitor():
while True:
log(str(threadLock))
time.sleep(1)
def main():
threading.Thread(target=database).start()
#threading.Thread(target=monitor).start()
time.sleep(1)
while True:
time.sleep(60*60*24)
syncMain()
threading.Thread(target=syncJson()).start()
threading.Thread(target=syncForge()).start()
if __name__=="__main__":
main()