charslab · carloalbertobarbano · May 11, 2026 · Feb 21, 2025 · Feb 21, 2025 · Feb 21, 2025
diff --git a/.github/workflows/build_image.yml b/.github/workflows/build_image.yml
@@ -1,11 +1,10 @@
-name: DockerBuildAndPush
+name: Build Image
 
 on:
   push:
     branches:
       - master
-      - developement
-      - ptb-async
+      - development
 
 env:
   IMAGE_NAME: transcriberbot

diff --git a/.github/workflows/docker_build_push.yml b/.github/workflows/docker_build_push.yml
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,6 @@
 # TranscriberBot-specific ignores
 media/
+.python-version
 
 # Generic data-related ignores
 *.csv

diff --git a/.python-version b/.python-version
diff --git a/config/subscription.json b/config/subscription.json
@@ -0,0 +1,4 @@
+{
+  "channel_id": "<id>",
+  "premium_join_link": "xxx"
+}
diff --git a/requirements.txt b/requirements.txt
@@ -6,4 +6,5 @@ tesserocr
 pydub
 zbarlight
 requests
-sentry-sdk
+sentry-sdk
+audioread
diff --git a/run.sh b/run.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-docker pull ghcr.io/charslab/transcriberbot:ptb-async
+docker pull ghcr.io/charslab/transcriberbot:development
 docker run \
    -e LC_ALL=C \
    -d --restart unless-stopped \
@@ -12,4 +12,4 @@ docker run \
    --cpus=4.0 \
    --memory=3000m \
    -u "$(id -u):1337" \
-   ghcr.io/charslab/transcriberbot:ptb-async
+   ghcr.io/charslab/transcriberbot:development
diff --git a/src/audiotools/speech.py b/src/audiotools/speech.py
@@ -97,7 +97,15 @@ async def transcribe_wit(path, api_key):
 
 
 async def transcribe_whisper(path):
-    resp = requests.get(f"{config.get_config_prop('app')['whisper']['api_endpoint']}/transcribe?file_id={path}")
+    loop = asyncio.get_event_loop()
+    resp = await loop.run_in_executor(
+        None,
+        partial(requests.get,
+                url=f"{config.get_config_prop('app')['whisper']['api_endpoint']}/transcribe?file_id={path}")
+    )
+
+    if resp.status_code != 200:
+        raise ValueError(f"Error transcribing audio: {resp.text}")
 
     # split the response into chunks of 4000 characters
     chunks = textwrap.wrap(resp.text, 4000)

diff --git a/src/config/__init__.py b/src/config/__init__.py
@@ -68,3 +68,10 @@ def get_document_extensions():
 
 def get_bot_admins():
     return [int(id) for id in get_config_prop("telegram")["admins"]]
+
+
+def get_premium_join_link():
+    return get_config_prop("subscription")["premium_join_link"]
+
+def get_premium_chat_id():
+    return get_config_prop("subscription")["channel_id"]
diff --git a/src/database/db.py b/src/database/db.py
@@ -33,13 +33,6 @@ def assoc(self):
     def __exit__(self, exc_type, exc_value, exc_traceback):
         logger.debug("__exit__")
         self.__close()
-
-        if exc_type:
-            logger.error("exc_type: {}".format(exc_type))
-            logger.error("exc_value: {}".format(exc_value))
-            logger.error("exc_traceback: {}".format(exc_traceback))
-            logger.error("Caught exception", exc_info=True)
-
         return True
 
     def execute(self, query, *args):
@@ -95,7 +88,7 @@ def get_chat_voice_enabled(chat_id):
             with TBDB._get_db() as db:
                 c = db.execute("SELECT voice_enabled FROM chats WHERE chat_id='{0}'".format(chat_id))
                 return c.fetchone()[0]
-        except TypeError as e:
+        except Exception as e:
             logger.error("Error getting voice_enabled for chat %d: %s", chat_id, e)
             raise e
 

diff --git a/src/transcriberbot/blueprints/__init__.py b/src/transcriberbot/blueprints/__init__.py
@@ -2,4 +2,4 @@
 Author: Carlo Alberto Barbano <carlo.alberto.barbano@outlook.com>
 Date: 15/02/25
 """
-from . import commands, messages, voice, photos, chat_handlers
+from . import commands, messages, voice, photos, chat_handlers, payments
diff --git a/src/transcriberbot/blueprints/payments.py b/src/transcriberbot/blueprints/payments.py
@@ -0,0 +1,35 @@
+"""
+Author: Carlo Alberto Barbano <carlo.barbano@unito.it>
+Date: 20/02/25
+"""
+import config
+import resources as R
+
+from telegram import Update, InlineKeyboardMarkup, InlineKeyboardButton
+from telegram.ext import ContextTypes
+from database import TBDB
+from transcriberbot.filters import is_premium_user
+
+
+async def premium(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
+    premium_join_link = config.get_premium_join_link()
+
+    keyboard = InlineKeyboardMarkup(
+        [[InlineKeyboardButton("Join", url=premium_join_link)]]
+    )
+
+    premium_join_message = R.get_string_resource(
+        'premium_join_message',
+        TBDB.get_chat_lang(update.effective_chat.id)
+    ).replace('{invite_url}', premium_join_link)
+
+    current_plan = R.get_string_resource("current_plan_free")
+    if await is_premium_user(update, context):
+        current_plan = R.get_string_resource("current_plan_premium")
+
+    await update.effective_message.reply_text(
+        f"{premium_join_message}\n\n{current_plan}",
+        reply_markup=keyboard, parse_mode="html"
+    )
+
+
diff --git a/src/transcriberbot/blueprints/voice.py b/src/transcriberbot/blueprints/voice.py
@@ -7,6 +7,7 @@
 import os
 import traceback
 import datetime
+import audioread
 from asyncio import CancelledError
 
 import telegram
@@ -18,6 +19,7 @@
 import config
 import resources as R
 from database import TBDB
+from transcriberbot.filters import is_premium_user
 
 logger = logging.getLogger(__name__)
 
@@ -94,6 +96,9 @@ async def run_voice_task(update: Update, context: ContextTypes.DEFAULT_TYPE, med
 
 async def process_media_voice(update: Update, context: ContextTypes.DEFAULT_TYPE, media: [Voice | VideoNote | Document],
                               name: str) -> None:
+    print("Update:", update)
+    print("Effective user:", update.effective_user)
+
     chat_id = update.effective_chat.id
     file_size = media.file_size
     max_size = config.get_config_prop("app").get("max_media_voice_file_size", 20 * 1024 * 1024)
@@ -119,7 +124,26 @@ async def process_media_voice(update: Update, context: ContextTypes.DEFAULT_TYPE
         os.remove(file_path)
 
 
-async def transcribe_audio_file(update: Update, context: ContextTypes.DEFAULT_TYPE, path: str):
+def get_duration(update: Update, path: str):
+    media = (update.effective_message.voice or update.effective_message.audio or
+             update.effective_message.video_note or update.effective_message.video)
+    if media is not None:
+        return media.duration
+
+    with audioread.audio_open(path) as f:
+        return f.duration
+
+
+async def get_backend(update: Update, context: ContextTypes.DEFAULT_TYPE, path):
+    backend = "wit"
+    if await is_premium_user(update, context):
+        logging.info("User is premium")
+        duration = get_duration(update, path)
+        if duration <= config.get_config_prop("app")["whisper"]["max_duration"]:
+            backend = "whisper"
+    return backend
+
+async def run_transcription(update: Update, context: ContextTypes.DEFAULT_TYPE, path: str, backend: str):
     chat_id = update.effective_chat.id
     task_id = update.effective_message.message_id
     lang = TBDB.get_chat_lang(chat_id)
@@ -137,7 +161,7 @@ async def transcribe_audio_file(update: Update, context: ContextTypes.DEFAULT_TY
     logger.debug("Using key %s for lang %s", api_key, lang)
 
     message = await context.bot.send_message(
-        chat_id, R.get_string_resource("transcribing", lang), parse_mode="html",
+        chat_id, f"{R.get_string_resource('transcribing', lang)} ({backend}, lang: {lang})", parse_mode="html",
         reply_to_message_id=update.effective_message.message_id
     )
 
@@ -151,7 +175,7 @@ async def transcribe_audio_file(update: Update, context: ContextTypes.DEFAULT_TY
         text = R.get_string_resource("transcription_text", lang) + "\n"
 
     try:
-        async for idx, speech, n_chunks in audiotools.transcribe(path, api_key):
+        async for idx, speech, n_chunks in audiotools.transcribe(path, api_key, backend=backend):
             logging.debug(f"Transcription idx={idx} n_chunks={n_chunks}, text={speech}")
             suffix = f" <b>[{idx + 1}/{n_chunks}]</b>" if idx < n_chunks - 1 else ""
             reply_markup = keyboard if idx < n_chunks - 1 else None
@@ -172,43 +196,6 @@ async def transcribe_audio_file(update: Update, context: ContextTypes.DEFAULT_TY
 
             text = f"{text} {speech}"
 
-            # retry_num = 0
-            # retry = True
-            # while retry:  # Retry loop
-            #     try:
-            #         if len(text + " " + speech) >= 4000:
-            #             text = R.get_string_resource("transcription_continues", lang) + "\n"
-            #             message = await context.bot.send_message(
-            #                 chat_id, f"{text} {speech} {suffix}",
-            #                 reply_to_message_id=message.message_id, parse_mode="html",
-            #                 reply_markup=keyboard
-            #             )
-            #         else:
-            #             message = await context.bot.edit_message_text(
-            #                 f"{text} {speech} {suffix}", chat_id=chat_id,
-            #                 message_id=message.message_id, parse_mode="html",
-            #                 reply_markup=keyboard
-            #             )
-            #
-            #         text += " " + speech
-            #         retry = False
-            #
-            #     except telegram.error.TimedOut as e:
-            #         print(e)
-            #         logger.error("Timeout error %s", traceback.format_exc())
-            #         retry_num += 1
-            #         if retry_num >= 3:
-            #             retry = False
-            #
-            #     except telegram.error.RetryAfter as r:
-            #         logger.warning("Retrying after %d", r.retry_after)
-            #         await asyncio.sleep(r.retry_after)
-            #
-            #     except telegram.error.TelegramError:
-            #         logger.error("Telegram error %s", traceback.format_exc())
-            #         retry = False
-
-
     except CancelledError:
         logging.debug("Task cancelled")
         await context.bot.edit_message_text(
@@ -226,3 +213,21 @@ async def transcribe_audio_file(update: Update, context: ContextTypes.DEFAULT_TY
         )
 
         raise e
+
+async def transcribe_audio_file(update: Update, context: ContextTypes.DEFAULT_TYPE, path: str):
+    backend = await get_backend(update, context, path)
+
+    # try running transcription, if it fails with whisper, try wit
+    try:
+        await run_transcription(update, context, path, backend)
+    except Exception as e:
+        if backend == "whisper":
+            logger.error("Whisper transcription failed, falling back to wit", exc_info=True)
+            try:
+                await run_transcription(update, context, path, "wit")
+            except Exception as e2:
+                logger.error("Wit transcription also failed", exc_info=True)
+                raise e2
+        else:
+            raise e
+
diff --git a/src/transcriberbot/bot.py b/src/transcriberbot/bot.py
@@ -2,23 +2,24 @@
 Author: Carlo Alberto Barbano <carlo.alberto.barbano@outlook.com>
 Date: 15/02/25
 """
-from telegram import Update
-
-import config
 import logging
-
-from telegram.ext import MessageHandler, ApplicationBuilder, CommandHandler, ContextTypes, CallbackQueryHandler, \
-    ChatMemberHandler
 from functools import partial
-from transcriberbot.blueprints import commands, messages, voice, photos, chat_handlers
-from transcriberbot.blueprints.commands import set_language
 
+from telegram import Update
+from telegram.ext import MessageHandler, ApplicationBuilder, CommandHandler, CallbackQueryHandler, \
+    ChatMemberHandler
 from telegram.ext.filters import VOICE, VIDEO_NOTE, AUDIO, PHOTO
+
+import config
+from transcriberbot.blueprints import commands, messages, voice, photos, chat_handlers, payments
+from transcriberbot.blueprints.commands import set_language
 from transcriberbot.filters import chat_admin, FromPrivate, AllowedDocument, BotAdmin
 
 
 def run(bot_token: str):
     application = (ApplicationBuilder()
+                   # .base_url("https://api.telegram.org/bot{token}/test")
+                   # .base_file_url("https://api.telegram.org/file/bot{token}/test")
                    .token(bot_token)
                    .concurrent_updates(True)
                    .build())
@@ -44,7 +45,8 @@ def run(bot_token: str):
         'enable_qr': commands.enable_qr,
         'translate': commands.translate,
         'donate': commands.donate,
-        'privacy': commands.privacy
+        'privacy': commands.privacy,
+        'premium': payments.premium
     }
 
     for command, callback in chat_admin_handlers.items():