gmclapi/master/sync.py

'''
Copyright Ghink Network Studio
Website: https://www.ghink.net
'''
from io import BufferedRandom
import urllib
import requests,os,time,json,threading,hashlib,re
from urllib import parse
from bs4 import BeautifulSoup as bs

db=[]
workPath="gmclapi/"
dataPath=workPath+"data/"
userAgent={'User-Agent':'GMCLAPI/0.0.1'}
proxies = {
  "http": "http://127.0.0.1:4780"
}
entrance=["http://launchermeta.mojang.com/mc/game/version_manifest.json","http://dl.liteloader.com/versions/versions.json"]
threadLock=[200,0]

def log(info):
    info="{}{}".format(time.strftime("%Y/%m/%d %H:%M:%S [Sync Thread]", time.localtime()),info)
    print(info)
    with open(workPath+"logs.log","a+") as fb:
        fb.write(info+"\n")
def database():
    threadLock[1]+=1
    global db
    with open(workPath+"database.db","r") as fb:
        db=json.loads(fb.read())
    while True:
        with open(workPath+"database.db","r") as fb:
            if json.loads(fb.read())==db:
                continue
        with open(workPath+"database.db","w+") as fb:
            fb.write(json.dumps(db))
def syncMain():
    global db
    for obj in entrance:
        while True:
            try:
                origin=requests.get(obj,headers=userAgent,proxies=proxies).content
                md5=hashlib.md5()
                md5.update(origin)
                hash=md5.hexdigest()
                switch=True
                for h in db:
                    if h["hash"]==hash:
                        switch=False
                        break
                if switch:
                    log("Synchronizing the file {} from {}".format(parse.urlparse(obj).path,obj))
                    i=0
                    for h in db:
                        if h["path"]==parse.urlparse(obj).path:
                            del switch[i]
                        i+=1
                    with open(dataPath+hash,"wb") as fb:
                        fb.write(origin)
                        db.append({
                            "hash":hash,
                            "source":obj,
                            "path":parse.urlparse(obj).path
                        })
                break
            except:
                pass
def syncJson():
    synced=[]
    def syncThread(rec):
        global db
        threadLock[1]+=1
        with open(dataPath+rec["hash"],"r") as fb:
            fall=re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',fb.read())
            for obj in fall:
                if parse.urlparse(obj).path=="/":
                    continue
                while True:
                    try:
                        switch=True
                        for r in db:
                            if r["path"]==parse.urlparse(obj).path:
                                switch=False
                                break
                        if switch:
                            log("Synchronizing the file {} from {}".format(parse.urlparse(obj).path,obj))
                            origin=requests.get(obj,headers=userAgent,proxies=proxies).content
                            md5=hashlib.md5()
                            md5.update(origin)
                            hash=md5.hexdigest()
                            with open(dataPath+hash,"wb") as fb:
                                fb.write(origin)
                                db.append({
                                    "hash":hash,
                                    "source":obj,
                                    "path":parse.urlparse(obj).path
                                })
                        break
                    except:
                        pass
        threadLock[1]-=1
    while True:
        i=0
        for rec in db:
            if ".json" in rec["path"] and rec["path"] not in synced:
                i+=1
                if threadLock[0]>threadLock[1]:
                    synced.append(rec["path"])
                    threading.Thread(target=syncThread,args=(rec,)).start()
        if i==0:
            break
    log("Synchronizing for json list finished")
def syncForge(entrance="https://files.minecraftforge.net/net/minecraftforge/forge/"):
    global db
    crawed=[]
    def syncThread(entrance):
        global db
        threadLock[1]+=1
        log("Crawling the page {}".format(entrance))
        page=requests.get(entrance,headers=userAgent,proxies=proxies).text
        soup=bs(page,features="html5lib")
        pageurls=soup.find_all("a",href=True)
        for obj in pageurls:
            link=obj.get("href")
            if ".html" in link and "http" not in link and link not in crawed:
                crawed.append(link)
                threading.Thread(target=syncThread,args=(parse.urljoin(entrance,link),)).start()
        fall=re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',page)
        for obj in fall:
            switch=True
            for h in db:
                if h["path"]==parse.urlparse(obj).path or h["path"]==parse.urlparse(obj[48:]).path:
                    switch=False
                    break
            if switch:
                if ".jar" in obj or ".zip" in obj:
                    log("Synchronizing the file {} from {}".format(parse.urlparse(obj[48:]).path,obj[48:]))
                    while True:
                        try:
                            origin=requests.get(obj[48:],headers=userAgent,proxies=proxies).content
                            md5=hashlib.md5()
                            md5.update(origin)
                            hash=md5.hexdigest()
                            with open(dataPath+hash,"wb") as fb:
                                fb.write(origin)
                                db.append({
                                    "hash":hash,
                                    "source":obj[48:],
                                    "path":parse.urlparse(obj[48:]).path
                                })
                            break
                        except:
                            log("Synchronizing the file {} from {} failed!Retrying...".format(parse.urlparse(obj[48:]).path,obj[48:]))
                            time.sleep(10)
                elif ".txt" in link:
                    log("Synchronizing the file {} from {}".format(parse.urlparse(obj).path,obj))
                    while True:
                        try:
                            origin=requests.get(obj,headers=userAgent,proxies=proxies).content
                            md5=hashlib.md5()
                            md5.update(origin)
                            hash=md5.hexdigest()
                            with open(dataPath+hash,"wb") as fb:
                                fb.write(origin)
                                db.append({
                                    "hash":hash,
                                    "source":obj,
                                    "path":parse.urlparse(obj).path
                                })
                            break
                        except:
                            log("Synchronizing the file {} from {} failed!Retrying...".format(parse.urlparse(obj).path,obj))
                            time.sleep(10)
        threadLock[1]-=1
    syncThread(entrance)
def monitor():
    while True:
        log(str(threadLock))
        time.sleep(1)
def main():
    threading.Thread(target=database).start()
    #threading.Thread(target=monitor).start()
    time.sleep(1)
    while True:
        time.sleep(60*60*24)
        syncMain()
        threading.Thread(target=syncJson()).start()
        threading.Thread(target=syncForge()).start()
if __name__=="__main__":
    main()