190 lines
7.5 KiB
Python
190 lines
7.5 KiB
Python
'''
|
|
Copyright Ghink Network Studio
|
|
Website: https://www.ghink.net
|
|
'''
|
|
from io import BufferedRandom
|
|
import urllib
|
|
import requests,os,time,json,threading,hashlib,re
|
|
from urllib import parse
|
|
from bs4 import BeautifulSoup as bs
|
|
|
|
db=[]
|
|
workPath="gmclapi/"
|
|
dataPath=workPath+"data/"
|
|
userAgent={'User-Agent':'GMCLAPI/0.0.1'}
|
|
proxies = {
|
|
"http": "http://127.0.0.1:4780"
|
|
}
|
|
entrance=["http://launchermeta.mojang.com/mc/game/version_manifest.json","http://dl.liteloader.com/versions/versions.json"]
|
|
threadLock=[200,0]
|
|
|
|
def log(info):
|
|
info="{}{}".format(time.strftime("%Y/%m/%d %H:%M:%S [Sync Thread]", time.localtime()),info)
|
|
print(info)
|
|
with open(workPath+"logs.log","a+") as fb:
|
|
fb.write(info+"\n")
|
|
def database():
|
|
threadLock[1]+=1
|
|
global db
|
|
with open(workPath+"database.db","r") as fb:
|
|
db=json.loads(fb.read())
|
|
while True:
|
|
with open(workPath+"database.db","r") as fb:
|
|
if json.loads(fb.read())==db:
|
|
continue
|
|
with open(workPath+"database.db","w+") as fb:
|
|
fb.write(json.dumps(db))
|
|
def syncMain():
|
|
global db
|
|
for obj in entrance:
|
|
while True:
|
|
try:
|
|
origin=requests.get(obj,headers=userAgent,proxies=proxies).content
|
|
md5=hashlib.md5()
|
|
md5.update(origin)
|
|
hash=md5.hexdigest()
|
|
switch=True
|
|
for h in db:
|
|
if h["hash"]==hash:
|
|
switch=False
|
|
break
|
|
if switch:
|
|
log("Synchronizing the file {} from {}".format(parse.urlparse(obj).path,obj))
|
|
i=0
|
|
for h in db:
|
|
if h["path"]==parse.urlparse(obj).path:
|
|
del switch[i]
|
|
i+=1
|
|
with open(dataPath+hash,"wb") as fb:
|
|
fb.write(origin)
|
|
db.append({
|
|
"hash":hash,
|
|
"source":obj,
|
|
"path":parse.urlparse(obj).path
|
|
})
|
|
break
|
|
except:
|
|
pass
|
|
def syncJson():
|
|
synced=[]
|
|
def syncThread(rec):
|
|
global db
|
|
threadLock[1]+=1
|
|
with open(dataPath+rec["hash"],"r") as fb:
|
|
fall=re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',fb.read())
|
|
for obj in fall:
|
|
if parse.urlparse(obj).path=="/":
|
|
continue
|
|
while True:
|
|
try:
|
|
switch=True
|
|
for r in db:
|
|
if r["path"]==parse.urlparse(obj).path:
|
|
switch=False
|
|
break
|
|
if switch:
|
|
log("Synchronizing the file {} from {}".format(parse.urlparse(obj).path,obj))
|
|
origin=requests.get(obj,headers=userAgent,proxies=proxies).content
|
|
md5=hashlib.md5()
|
|
md5.update(origin)
|
|
hash=md5.hexdigest()
|
|
with open(dataPath+hash,"wb") as fb:
|
|
fb.write(origin)
|
|
db.append({
|
|
"hash":hash,
|
|
"source":obj,
|
|
"path":parse.urlparse(obj).path
|
|
})
|
|
break
|
|
except:
|
|
pass
|
|
threadLock[1]-=1
|
|
while True:
|
|
i=0
|
|
for rec in db:
|
|
if ".json" in rec["path"] and rec["path"] not in synced:
|
|
i+=1
|
|
if threadLock[0]>threadLock[1]:
|
|
synced.append(rec["path"])
|
|
threading.Thread(target=syncThread,args=(rec,)).start()
|
|
if i==0:
|
|
break
|
|
log("Synchronizing for json list finished")
|
|
def syncForge(entrance="https://files.minecraftforge.net/net/minecraftforge/forge/"):
|
|
global db
|
|
crawed=[]
|
|
def syncThread(entrance):
|
|
global db
|
|
threadLock[1]+=1
|
|
log("Crawling the page {}".format(entrance))
|
|
page=requests.get(entrance,headers=userAgent,proxies=proxies).text
|
|
soup=bs(page,features="html5lib")
|
|
pageurls=soup.find_all("a",href=True)
|
|
for obj in pageurls:
|
|
link=obj.get("href")
|
|
if ".html" in link and "http" not in link and link not in crawed:
|
|
crawed.append(link)
|
|
threading.Thread(target=syncThread,args=(parse.urljoin(entrance,link),)).start()
|
|
fall=re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',page)
|
|
for obj in fall:
|
|
switch=True
|
|
for h in db:
|
|
if h["path"]==parse.urlparse(obj).path or h["path"]==parse.urlparse(obj[48:]).path:
|
|
switch=False
|
|
break
|
|
if switch:
|
|
if ".jar" in obj or ".zip" in obj:
|
|
log("Synchronizing the file {} from {}".format(parse.urlparse(obj[48:]).path,obj[48:]))
|
|
while True:
|
|
try:
|
|
origin=requests.get(obj[48:],headers=userAgent,proxies=proxies).content
|
|
md5=hashlib.md5()
|
|
md5.update(origin)
|
|
hash=md5.hexdigest()
|
|
with open(dataPath+hash,"wb") as fb:
|
|
fb.write(origin)
|
|
db.append({
|
|
"hash":hash,
|
|
"source":obj[48:],
|
|
"path":parse.urlparse(obj[48:]).path
|
|
})
|
|
break
|
|
except:
|
|
log("Synchronizing the file {} from {} failed!Retrying...".format(parse.urlparse(obj[48:]).path,obj[48:]))
|
|
time.sleep(10)
|
|
elif ".txt" in link:
|
|
log("Synchronizing the file {} from {}".format(parse.urlparse(obj).path,obj))
|
|
while True:
|
|
try:
|
|
origin=requests.get(obj,headers=userAgent,proxies=proxies).content
|
|
md5=hashlib.md5()
|
|
md5.update(origin)
|
|
hash=md5.hexdigest()
|
|
with open(dataPath+hash,"wb") as fb:
|
|
fb.write(origin)
|
|
db.append({
|
|
"hash":hash,
|
|
"source":obj,
|
|
"path":parse.urlparse(obj).path
|
|
})
|
|
break
|
|
except:
|
|
log("Synchronizing the file {} from {} failed!Retrying...".format(parse.urlparse(obj).path,obj))
|
|
time.sleep(10)
|
|
threadLock[1]-=1
|
|
syncThread(entrance)
|
|
def monitor():
|
|
while True:
|
|
log(str(threadLock))
|
|
time.sleep(1)
|
|
def main():
|
|
threading.Thread(target=database).start()
|
|
#threading.Thread(target=monitor).start()
|
|
time.sleep(1)
|
|
while True:
|
|
time.sleep(60*60*24)
|
|
syncMain()
|
|
threading.Thread(target=syncJson()).start()
|
|
threading.Thread(target=syncForge()).start()
|
|
if __name__=="__main__":
|
|
main() |