mirror of
https://agent.ghink.cloud/ghinknet/richka
synced 2024-12-28 20:03:28 +00:00
single file download done
This commit is contained in:
parent
c676cd2eb5
commit
cdc2073460
41
README.md
41
README.md
@ -1,2 +1,43 @@
|
|||||||
# Richka - Python Async Download Engine
|
# Richka - Python Async Download Engine
|
||||||
|
|
||||||
|
#### Richka (From Ukrainian: Рiчка) means river, stands for the download speed of Richka Engine
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
`import richka` and run script in your code, for example:
|
||||||
|
|
||||||
|
```
|
||||||
|
import richka
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import time
|
||||||
|
import threading
|
||||||
|
|
||||||
|
# Create task controller
|
||||||
|
controller = richka.Controller()
|
||||||
|
|
||||||
|
def download():
|
||||||
|
global controller
|
||||||
|
|
||||||
|
# Create download task
|
||||||
|
time_used, file_size = asyncio.run(richka.download("https://mirrors.tuna.tsinghua.edu.cn/videolan-ftp/vlc-iOS/3.6.4/VLC-iOS.ipa", "VLC-iOS.ipa", controller))
|
||||||
|
|
||||||
|
# Result
|
||||||
|
print("Time used:", time_used)
|
||||||
|
print(f"Speed: {file_size / time_used / pow(1024, 2)}MiB/s")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
global controller
|
||||||
|
|
||||||
|
# Progress monitor
|
||||||
|
while controller.status:
|
||||||
|
if controller.status == 1:
|
||||||
|
print(f"Download Progress: {round(controller.progress, 2)}% \r", end="")
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
threading.Thread(target=download).start()
|
||||||
|
main()
|
||||||
|
|
||||||
|
```
|
||||||
|
Then you'll get a file from Internet :D.
|
1
requirements.txt
Normal file
1
requirements.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
aiohttp~=3.8.5
|
@ -1,2 +1,3 @@
|
|||||||
from .config import *
|
from .config import *
|
||||||
from .core import *
|
from .core import *
|
||||||
|
from .controller import *
|
||||||
|
@ -7,13 +7,16 @@ USER_AGENT = f"Richka{__VERSION[0]}/{__VERSION[1]}.{__VERSION[2]}.{__VERSION[3]}
|
|||||||
HEADERS = {"user-agent": USER_AGENT}
|
HEADERS = {"user-agent": USER_AGENT}
|
||||||
COROUTINE_LIMIT = 10
|
COROUTINE_LIMIT = 10
|
||||||
SLICE_THRESHOLD = 10 # MiB
|
SLICE_THRESHOLD = 10 # MiB
|
||||||
|
TIMEOUT = 30
|
||||||
|
RETRY_TIMES = 5
|
||||||
|
CHUNK_SIZE = 102400
|
||||||
|
|
||||||
logger = logging.getLogger("Richka Engine")
|
logger = logging.getLogger("Richka Engine")
|
||||||
|
|
||||||
def set_user_agent(user_agent: str) -> None:
|
def set_user_agent(user_agent: str) -> None:
|
||||||
"""
|
"""
|
||||||
Set Public User Agent for HTTP Requests
|
Set Public User Agent for HTTP Requests
|
||||||
:param user_agent: String
|
:param user_agent: String User-Agent you want to set.
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
richka.USER_AGENT = user_agent
|
richka.USER_AGENT = user_agent
|
||||||
@ -22,7 +25,7 @@ def set_user_agent(user_agent: str) -> None:
|
|||||||
def set_headers(headers: dict) -> None:
|
def set_headers(headers: dict) -> None:
|
||||||
"""
|
"""
|
||||||
Set Public Headers for HTTP Requests
|
Set Public Headers for HTTP Requests
|
||||||
:param headers: Dictionary
|
:param headers: Dictionary Headers you want to set.
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
for key, value in headers.items():
|
for key, value in headers.items():
|
||||||
@ -31,7 +34,7 @@ def set_headers(headers: dict) -> None:
|
|||||||
def set_coroutine_limit(coroutine_limit: int) -> None:
|
def set_coroutine_limit(coroutine_limit: int) -> None:
|
||||||
"""
|
"""
|
||||||
Set Coroutine Limit for HTTP Requests
|
Set Coroutine Limit for HTTP Requests
|
||||||
:param coroutine_limit: Integer
|
:param coroutine_limit: Integer Coroutine number limit.
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
richka.COROUTINE_LIMIT = coroutine_limit
|
richka.COROUTINE_LIMIT = coroutine_limit
|
||||||
@ -39,7 +42,23 @@ def set_coroutine_limit(coroutine_limit: int) -> None:
|
|||||||
def set_slice_threshold(slice_threshold: int) -> None:
|
def set_slice_threshold(slice_threshold: int) -> None:
|
||||||
"""
|
"""
|
||||||
Set Slice Threshold for HTTP Requests
|
Set Slice Threshold for HTTP Requests
|
||||||
:param slice_threshold: Integer
|
:param slice_threshold: Integer Slice threshold to enable coroutine download.
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
richka.SLICE_THRESHOLD = slice_threshold
|
richka.SLICE_THRESHOLD = slice_threshold
|
||||||
|
|
||||||
|
def set_timeout(timeout: int) -> None:
|
||||||
|
"""
|
||||||
|
Set Timeout for HTTP Requests
|
||||||
|
:param timeout: Integer Timeout time in seconds.
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
richka.TIMEOUT = timeout
|
||||||
|
|
||||||
|
def set_retry_times(retry_times: int) -> None:
|
||||||
|
"""
|
||||||
|
Set Retry Times for HTTP Requests
|
||||||
|
:param retry_times: Integer Allowed retry times.
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
richka.RETRY_TIMES = retry_times
|
||||||
|
88
richka/controller.py
Normal file
88
richka/controller.py
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
import asyncio
|
||||||
|
|
||||||
|
class Controller:
|
||||||
|
def __init__(self):
|
||||||
|
self.__paused = False
|
||||||
|
self.__total_size = 0
|
||||||
|
self.__downloaded_size = 0
|
||||||
|
self.__downloaded_size_slice = {}
|
||||||
|
self.__lock = asyncio.Lock() # For async safe
|
||||||
|
|
||||||
|
@property
|
||||||
|
def total_size(self) -> int:
|
||||||
|
"""
|
||||||
|
Get the total size of the file.
|
||||||
|
:return: Integer Size of the file.
|
||||||
|
"""
|
||||||
|
return self.__total_size
|
||||||
|
|
||||||
|
@total_size.setter
|
||||||
|
def total_size(self, size: int) -> None:
|
||||||
|
"""
|
||||||
|
Set the total size of the file.
|
||||||
|
:param size: Integer Size of the file.
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
if not self.__total_size:
|
||||||
|
self.__total_size = size
|
||||||
|
|
||||||
|
async def update_progress(self, downloaded_chunk_size: int, chunk_id: str = None) -> None:
|
||||||
|
"""
|
||||||
|
Update the progress of the download. Do not operate this!
|
||||||
|
:param downloaded_chunk_size: Integer Downloaded Size of the file.
|
||||||
|
:param chunk_id: String Chunk ID of the part.
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
async with self.__lock:
|
||||||
|
if chunk_id is None and self.__downloaded_size_slice == {}:
|
||||||
|
self.__downloaded_size = downloaded_chunk_size
|
||||||
|
else:
|
||||||
|
self.__downloaded_size_slice[chunk_id] = downloaded_chunk_size
|
||||||
|
self.__downloaded_size = sum(self.__downloaded_size_slice.values())
|
||||||
|
|
||||||
|
@property
|
||||||
|
def paused(self) -> bool:
|
||||||
|
"""
|
||||||
|
Get the paused state of the downloader.
|
||||||
|
:return: Boolean State of the downloader.
|
||||||
|
"""
|
||||||
|
return self.__paused
|
||||||
|
|
||||||
|
def pause(self) -> None:
|
||||||
|
"""
|
||||||
|
Pause the downloader.
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
self.__paused = True
|
||||||
|
|
||||||
|
def unpause(self) -> None:
|
||||||
|
"""
|
||||||
|
Unpause the downloader.
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
self.__paused = False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def status(self) -> int:
|
||||||
|
"""
|
||||||
|
Get the status of the downloader.
|
||||||
|
:return: Integer Status of the downloader. -1: Haven't Started -2: Paused 0: Done 1: Downloading
|
||||||
|
"""
|
||||||
|
if self.__downloaded_size == 0:
|
||||||
|
return -1 # Haven't started
|
||||||
|
elif self.__paused:
|
||||||
|
return -2 # Paused
|
||||||
|
elif self.__downloaded_size / self.__total_size == 1:
|
||||||
|
return 0 # Done
|
||||||
|
else:
|
||||||
|
return 1 # Downloading
|
||||||
|
|
||||||
|
@property
|
||||||
|
def progress(self) -> float:
|
||||||
|
"""
|
||||||
|
Get the progress of the downloader.
|
||||||
|
:return: Float Progress of the downloader.
|
||||||
|
"""
|
||||||
|
if not self.__total_size:
|
||||||
|
return -1
|
||||||
|
return self.__downloaded_size / self.__total_size * 100
|
101
richka/core.py
101
richka/core.py
@ -1,44 +1,97 @@
|
|||||||
import time
|
import time
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
import richka
|
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
|
||||||
async def __download_range(session: aiohttp.ClientSession, url: str, start: int, end: int, destination: str) -> None:
|
import richka
|
||||||
|
from .controller import Controller
|
||||||
|
|
||||||
|
async def __download_range(session: aiohttp.ClientSession, url: str, start: int, end: int, destination: str, controller: Controller = None) -> None:
|
||||||
richka.logger.info(f'Downloading part {start}-{end} of {url} to {destination}.')
|
richka.logger.info(f'Downloading part {start}-{end} of {url} to {destination}.')
|
||||||
|
|
||||||
headers = {**richka.HEADERS, **{'range': f'bytes={start}-{end}'}}
|
headers = {**richka.HEADERS, **{'range': f'bytes={start}-{end}'}}
|
||||||
|
retry_times = richka.RETRY_TIMES
|
||||||
|
|
||||||
async with session.get(url, headers=headers) as response:
|
while retry_times > 0:
|
||||||
content = await response.read()
|
try:
|
||||||
with open(destination, 'r+b') as f:
|
async with session.get(url, headers=headers, timeout=aiohttp.ClientTimeout(sock_read=richka.TIMEOUT, sock_connect=richka.TIMEOUT)) as response:
|
||||||
f.seek(start)
|
with open(destination, 'r+b') as f:
|
||||||
f.write(content)
|
f.seek(start)
|
||||||
|
# Read stream
|
||||||
|
length = 0
|
||||||
|
async for chunk in response.content.iter_chunked(richka.CHUNK_SIZE):
|
||||||
|
while controller.paused:
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
# noinspection PyTypeChecker
|
||||||
|
f.write(chunk)
|
||||||
|
# noinspection PyTypeChecker
|
||||||
|
length += len(chunk)
|
||||||
|
# Update tracker
|
||||||
|
if controller is not None:
|
||||||
|
await controller.update_progress(length, chunk_id=f"{start}-{end}")
|
||||||
|
break
|
||||||
|
except (aiohttp.ClientError, asyncio.TimeoutError):
|
||||||
|
retry_times -= 1
|
||||||
|
richka.logger.info(f'Download part {start}-{end} of {url} to {destination} failed for {richka.RETRY_TIMES - retry_times} times, retrying...')
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
|
||||||
richka.logger.info(f'Downloaded part {start}-{end} of {destination}.')
|
if retry_times > 0:
|
||||||
|
richka.logger.info(f'Downloaded part {start}-{end} of {url} to {destination}.')
|
||||||
|
else:
|
||||||
|
raise TimeoutError(f'Download part {start}-{end} of {url} to {destination} timed out.')
|
||||||
|
|
||||||
async def __download_single(session: aiohttp.ClientSession, url: str, destination: str) -> None:
|
async def __download_single(session: aiohttp.ClientSession, url: str, destination: str, controller: Controller = None) -> None:
|
||||||
richka.logger.info(f'Downloading {url} to {destination}.')
|
richka.logger.info(f'Downloading {url} to {destination}.')
|
||||||
|
|
||||||
async with session.get(url, headers=richka.HEADERS) as response:
|
retry_times = richka.RETRY_TIMES\
|
||||||
content = await response.read()
|
|
||||||
with open(destination, 'r+b') as f:
|
|
||||||
f.write(content)
|
|
||||||
|
|
||||||
richka.logger.info(f'Downloaded {url} to {destination}.')
|
while retry_times > 0:
|
||||||
|
try:
|
||||||
|
async with session.get(url, headers=richka.HEADERS, timeout=aiohttp.ClientTimeout(sock_read=richka.TIMEOUT, sock_connect=richka.TIMEOUT)) as response:
|
||||||
|
with open(destination, 'r+b') as f:
|
||||||
|
# Read stream
|
||||||
|
length = 0
|
||||||
|
async for chunk in response.content.iter_chunked(richka.CHUNK_SIZE):
|
||||||
|
while controller.paused:
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
# noinspection PyTypeChecker
|
||||||
|
f.write(chunk)
|
||||||
|
# noinspection PyTypeChecker
|
||||||
|
length += len(chunk)
|
||||||
|
# Update tracker
|
||||||
|
if controller is not None:
|
||||||
|
await controller.update_progress(length)
|
||||||
|
break
|
||||||
|
except (aiohttp.ClientError, asyncio.TimeoutError):
|
||||||
|
retry_times -= 1
|
||||||
|
richka.logger.info(f'Download {url} to {destination} failed for {richka.RETRY_TIMES - retry_times} times, retrying...')
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
|
||||||
async def download(url: str, destination: str) -> float:
|
if retry_times > 0:
|
||||||
|
richka.logger.info(f'Downloaded {url} to {destination}.')
|
||||||
|
else:
|
||||||
|
raise TimeoutError(f'Download {url} to {destination} timed out.')
|
||||||
|
|
||||||
|
async def download(url: str, destination: str, controller: Controller = None) -> tuple[float, int]:
|
||||||
|
"""
|
||||||
|
Download a single file.
|
||||||
|
:param url: String Source URL.
|
||||||
|
:param destination: Destination Path.
|
||||||
|
:param controller: Download Controller.
|
||||||
|
:return: [Float, Integer] [Time Used, File Size]
|
||||||
|
"""
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
# Get file size
|
# Get file size
|
||||||
async with session.head(url) as response:
|
async with session.head(url) as response:
|
||||||
file_size = int(response.headers.get('Content-Length', 0))
|
file_size = int(response.headers.get('Content-Length', 0))
|
||||||
|
|
||||||
if not file_size or file_size / pow(1024, 2) <= 10:
|
if not file_size or file_size / pow(1024, 2) <= richka.SLICE_THRESHOLD:
|
||||||
if not file_size:
|
if not file_size:
|
||||||
richka.logger.info(f'Failed to get file size, directly downloading {url}.')
|
richka.logger.info(f'Failed to get file size, directly downloading {url}.')
|
||||||
else:
|
else:
|
||||||
richka.logger.info(f"Downloading {url} ({file_size}) to {destination} with signle mode.")
|
richka.logger.info(f"Downloading {url} ({file_size}) to {destination} with single mode.")
|
||||||
|
if controller is not None:
|
||||||
|
controller.total_size = file_size
|
||||||
|
|
||||||
# Create an empty file
|
# Create an empty file
|
||||||
with open(destination, 'wb') as f:
|
with open(destination, 'wb') as f:
|
||||||
@ -46,11 +99,14 @@ async def download(url: str, destination: str) -> float:
|
|||||||
|
|
||||||
# Start task
|
# Start task
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
await __download_single(session, url, destination)
|
await __download_single(session, url, destination, controller)
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
return end_time - start_time
|
richka.logger.info(f"Downloaded {url} ({file_size}) to {destination} with single mode.")
|
||||||
|
return end_time - start_time, file_size
|
||||||
|
|
||||||
richka.logger.info(f'Downloading {url} ({file_size}) to {destination} with slicing mode.')
|
richka.logger.info(f'Downloading {url} ({file_size}) to {destination} with slicing mode.')
|
||||||
|
if controller is not None:
|
||||||
|
controller.total_size = file_size
|
||||||
|
|
||||||
# Calc slice size
|
# Calc slice size
|
||||||
part_size = file_size // richka.COROUTINE_LIMIT
|
part_size = file_size // richka.COROUTINE_LIMIT
|
||||||
@ -64,11 +120,12 @@ async def download(url: str, destination: str) -> float:
|
|||||||
for i in range(richka.COROUTINE_LIMIT):
|
for i in range(richka.COROUTINE_LIMIT):
|
||||||
start = i * part_size
|
start = i * part_size
|
||||||
end = (start + part_size - 1) if i < richka.COROUTINE_LIMIT - 1 else (file_size - 1)
|
end = (start + part_size - 1) if i < richka.COROUTINE_LIMIT - 1 else (file_size - 1)
|
||||||
task = __download_range(session, url, start, end, destination)
|
task = __download_range(session, url, start, end, destination, controller)
|
||||||
tasks.append(task)
|
tasks.append(task)
|
||||||
|
|
||||||
# Start all task
|
# Start all task
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
await asyncio.gather(*tasks)
|
await asyncio.gather(*tasks)
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
return end_time - start_time
|
richka.logger.info(f'Downloaded {url} ({file_size}) to {destination} with slicing mode.')
|
||||||
|
return end_time - start_time, file_size
|
||||||
|
9
setup.py
9
setup.py
@ -21,17 +21,22 @@ setup(
|
|||||||
version=about["__version__"],
|
version=about["__version__"],
|
||||||
description=about["__description__"],
|
description=about["__description__"],
|
||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
|
install_requires=[
|
||||||
|
"aiohttp",
|
||||||
|
],
|
||||||
url=about["__url__"],
|
url=about["__url__"],
|
||||||
license=about["__license__"],
|
license=about["__license__"],
|
||||||
author=about["__author__"],
|
author=about["__author__"],
|
||||||
author_email=about["__author_email__"],
|
author_email=about["__author_email__"],
|
||||||
long_description_content_type="text/markdown",
|
long_description_content_type="text/markdown",
|
||||||
long_description=readme,
|
long_description=readme,
|
||||||
install_requires=[
|
python_requires='>=3.9',
|
||||||
],
|
|
||||||
classifiers=[
|
classifiers=[
|
||||||
'License :: OSI Approved :: MIT License',
|
'License :: OSI Approved :: MIT License',
|
||||||
'Programming Language :: Python :: 3.9',
|
'Programming Language :: Python :: 3.9',
|
||||||
|
'Programming Language :: Python :: 3.10',
|
||||||
|
'Programming Language :: Python :: 3.11',
|
||||||
|
'Programming Language :: Python :: 3.12',
|
||||||
'Programming Language :: Python :: 3 :: Only',
|
'Programming Language :: Python :: 3 :: Only',
|
||||||
],
|
],
|
||||||
entry_points={
|
entry_points={
|
||||||
|
Loading…
Reference in New Issue
Block a user