Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "nonebot-plugin-parser"
version = "2.6.0"
description = "NoneBot2 链接分享解析 Alconna 版, 现支持B站|抖音|快手|微博|小红书|YouTube|TikTok|Twitter|AcFun|NGA"
readme = "README.md"
requires-python = ">=3.10"
requires-python = ">=3.11"
authors = [{ "name" = "fllesser", "email" = "fllessive@gmail.com" }]
keywords = [
"acfun",
Expand Down Expand Up @@ -53,6 +53,7 @@ dependencies = [
"nonebot-plugin-apscheduler>=0.5.0,<1.0.0",
"nonebot-plugin-localstore>=0.7.4,<1.0.0",
"nonebot-plugin-uninfo>=0.10.1,<1.0.0",
"pyreqwest>=0.11.6",
]

[project.urls]
Expand Down Expand Up @@ -150,8 +151,8 @@ test-render = "pytest tests/renders --cov=src --cov-report=xml --junitxml=junit.
bump = "bump-my-version bump"
show-bump = "bump-my-version show-bump"

[tool.pyright]
pythonVersion = "3.10"
[tool.basedpyright]
pythonVersion = "3.11"
pythonPlatform = "All"
defineConstant = { PYDANTIC_V2 = true }
executionEnvironments = [
Expand Down
6 changes: 3 additions & 3 deletions src/nonebot_plugin_parser/constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from enum import Enum
from enum import StrEnum
from typing import Final

from httpx import Timeout
Expand Down Expand Up @@ -29,7 +29,7 @@
DOWNLOAD_TIMEOUT: Final[Timeout] = Timeout(connect=15.0, read=240.0, write=10.0, pool=10.0)


class PlatformEnum(str, Enum):
class PlatformEnum(StrEnum):
ACFUN = "acfun"
BILIBILI = "bilibili"
DOUYIN = "douyin"
Expand All @@ -45,7 +45,7 @@ def __str__(self) -> str:
return self.value


class RenderType(str, Enum):
class RenderType(StrEnum):
default = "default"
common = "common"
htmlkit = "htmlkit"
Expand Down
106 changes: 83 additions & 23 deletions src/nonebot_plugin_parser/download/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import aiofiles
from httpx import HTTPError, AsyncClient
from nonebot import logger
from curl_cffi import CurlError
from rich.progress import (
Progress,
BarColumn,
Expand Down Expand Up @@ -59,33 +60,87 @@

headers = {**self.headers, **(ext_headers or {})}

try:
async with self.client.stream("GET", url, headers=headers, follow_redirects=True) as response:
response.raise_for_status()
content_length = response.headers.get("Content-Length")
content_length = int(content_length) if content_length else 0
async with self.client.stream("GET", url, headers=headers, follow_redirects=True) as response:
response.raise_for_status()
content_length = response.headers.get("Content-Length")
content_length = int(content_length) if content_length else 0

if content_length == 0:
logger.warning(f"媒体 url: {url}, 大小为 0, 取消下载")
raise IgnoreException
if content_length == 0:
logger.warning(f"媒体 url: {url}, 大小为 0, 取消下载")
raise IgnoreException

if (file_size := content_length / 1024 / 1024) > pconfig.max_size:
logger.warning(f"媒体 url: {url} 大小 {file_size:.2f} MB, 超过 {pconfig.max_size} MB, 取消下载")
raise IgnoreException
if (file_size := content_length / 1024 / 1024) > pconfig.max_size:
logger.warning(f"媒体 url: {url} 大小 {file_size:.2f} MB, 超过 {pconfig.max_size} MB, 取消下载")
raise IgnoreException

with self.rich_progress(file_name, content_length) as update_progress:
async with aiofiles.open(file_path, "wb") as file:
async for chunk in response.aiter_bytes(chunk_size):
await file.write(chunk)
update_progress(advance=len(chunk))
with self.rich_progress(file_name, content_length) as update_progress:
async with aiofiles.open(file_path, "wb") as file:
async for chunk in response.aiter_bytes(chunk_size):
await file.write(chunk)
update_progress(advance=len(chunk))

except HTTPError:
return file_path

async def _download_file_with_curl_cffi(
self,
url: str,
*,
file_name: str | None = None,
ext_headers: dict[str, str] | None = None,
) -> Path:
from curl_cffi import Response, CurlError, AsyncSession

if not file_name:
file_name = generate_file_name(url)
file_path = self.cache_dir / file_name
# 如果文件存在,则直接返回
if file_path.exists():
return file_path

headers = {**self.headers, **(ext_headers or {})}
try:
async with AsyncSession() as session:
response: Response = await session.get(
url,
headers=headers,
timeout=DOWNLOAD_TIMEOUT,
)
async with aiofiles.open(file_path, "wb") as file:
await file.write(response.content)
except CurlError:
await safe_unlink(file_path)
logger.exception(f"下载失败 | url: {url}, file_path: {file_path}")
raise DownloadException("媒体下载失败")

return file_path

async def _download_file_with_reqwest(
self,
url: str,
*,
file_name: str | None = None,
ext_headers: dict[str, str] | None = None,
):
"""download file by url with reqwest"""
from pyreqwest.simple.request import pyreqwest_get

if not file_name:
file_name = generate_file_name(url)

file_path = self.cache_dir / file_name
# 如果文件存在,则直接返回
if file_path.exists():
return file_path

headers = {**self.headers, **(ext_headers or {})}

reponse = await pyreqwest_get(url).headers(headers).send()

Check warning on line 137 in src/nonebot_plugin_parser/download/__init__.py

View workflow job for this annotation

GitHub Actions / Typos

"reponse" should be "response".

async with aiofiles.open(file_path, "wb") as file:
await file.write(await reponse.bytes())

Check warning on line 140 in src/nonebot_plugin_parser/download/__init__.py

View workflow job for this annotation

GitHub Actions / Typos

"reponse" should be "response".

return file_path

@auto_task
async def download_file(
self,
Expand All @@ -96,12 +151,17 @@
chunk_size: int = 64 * 1024,
) -> Path:
"""download file by url with stream"""
return await self._download_file(
url,
file_name=file_name,
ext_headers=ext_headers,
chunk_size=chunk_size,
)
try:
path = await self._download_file(url, file_name=file_name, ext_headers=ext_headers, chunk_size=chunk_size)
except HTTPError:
logger.opt(exception=True).warning(f"下载失败(httpx) | url: {url}")
try:
path = await self._download_file_with_reqwest(url, file_name=file_name, ext_headers=ext_headers)
except CurlError:
logger.opt(exception=True).warning(f"下载失败(curl_cffi) | url: {url}")
raise DownloadException("媒体下载失败")

return path

@auto_task
async def download_video(
Expand Down
4 changes: 2 additions & 2 deletions src/nonebot_plugin_parser/parsers/base.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from re import Match, Pattern, compile
from abc import ABC
from typing import TYPE_CHECKING, Any, TypeVar, ClassVar, cast
from typing import TYPE_CHECKING, Any, TypeVar, ClassVar, cast, final
from asyncio import Task
from pathlib import Path
from collections.abc import Callable, Coroutine
from typing_extensions import Unpack, final
from typing_extensions import Unpack

from .data import Platform, ParseResult, ImageContent, ParseResultKwargs
from .task import PathTask
Expand Down
Loading
Loading