Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
d4eac53
Add definitions for MessageConcatenation and MessageMerge classes in …
soft-suroleb Dec 9, 2024
a5f6295
Implement basic logic for concatenating catalogs
soft-suroleb Dec 9, 2024
6d3212b
Add options: unique, less-than, more-than, no-wrap, and width
soft-suroleb Dec 9, 2024
8695769
Implement basic msgmerge logic for working with a compendium
soft-suroleb Dec 9, 2024
4774889
Write tests for msgcat
soft-suroleb Dec 9, 2024
5a445c4
Write tests for msgmerge
soft-suroleb Dec 9, 2024
f7ddd85
Add options update, backup, and c_overwrite for a different compendiu…
soft-suroleb Dec 9, 2024
b00f215
Add test for msgmerge compendium overwrite mode with no comments
soft-suroleb Dec 9, 2024
0eab37d
Refactor test for msgmerge with compendium-overwrite option
soft-suroleb Dec 9, 2024
7fb19e3
Create a catalog without fuzzy by default, remove add-location
soft-suroleb Dec 9, 2024
4709cb6
Rename msgmerge to merge and msgcat to concat
soft-suroleb Dec 9, 2024
8b780d4
Add discription to all options
soft-suroleb Dec 9, 2024
80ab44a
Ability to specify multiple compendiums
soft-suroleb Mar 2, 2025
8b00b02
Marking conflicting messages
soft-suroleb Mar 2, 2025
a28b263
Fix PR issues
soft-suroleb Mar 23, 2025
debe950
Add info about pybable concat and pybabel merge into docs
soft-suroleb Apr 7, 2025
13e3330
Add usage documentation for pybabel concat and merge commands
soft-suroleb Apr 21, 2025
49e6003
Fix PR issues
soft-suroleb Apr 27, 2025
f1618e5
Add '_conflicts' getter in catalog
soft-suroleb Apr 27, 2025
6dd98e6
Remove fuzzy flag mutation from add_conflict
soft-suroleb Apr 8, 2026
ad322bd
Refactor ConcatenateCatalog: stdout output, mutual exclusivity, bette…
soft-suroleb Apr 8, 2026
9644a86
Fix MergeCatalog options and update docs
soft-suroleb Apr 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion babel/messages/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@

import datetime
import re
import os
from collections.abc import Iterable, Iterator
from collections import defaultdict
from copy import copy
from difflib import SequenceMatcher
from email import message_from_string
from heapq import nlargest
from string import Formatter
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, TypedDict

from babel import __version__ as VERSION
from babel.core import Locale, UnknownLocaleError
Expand Down Expand Up @@ -357,6 +359,13 @@ def _force_text(s: str | bytes, encoding: str = 'utf-8', errors: str = 'strict')
return str(s)


class ConflictInfo(TypedDict):
message: Message
filename: str
project: str
version: str


class Catalog:
"""Representation of a message catalog."""

Expand Down Expand Up @@ -400,6 +409,7 @@ def __init__(
self.locale = locale
self._header_comment = header_comment
self._messages: dict[str | tuple[str, str], Message] = {}
self._conflicts: dict[str | tuple[str, str], list[ConflictInfo]] = defaultdict(list)

self.project = project or 'PROJECT'
self.version = version or 'VERSION'
Expand Down Expand Up @@ -780,6 +790,18 @@ def __setitem__(self, id: _MessageID, message: Message) -> None:
)
self._messages[key] = message

def add_conflict(self, message: Message, filename: str, project: str, version: str):
key = message.id
self._conflicts[key].append({
'message': message,
'filename': filename,
'project': project,
'version': version,
})

def get_conflicts(self, id: _MessageID) -> list[ConflictInfo]:
return self._conflicts.get(id, [])

def add(
self,
id: _MessageID,
Expand Down
263 changes: 262 additions & 1 deletion babel/messages/frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,15 @@
import sys
import tempfile
import warnings
from collections import Counter, defaultdict
from configparser import RawConfigParser
from io import StringIO
from typing import Any, BinaryIO, Iterable, Literal

from babel import Locale, localedata
from babel import __version__ as VERSION
from babel.core import UnknownLocaleError
from babel.messages.catalog import DEFAULT_HEADER, Catalog
from babel.messages.catalog import DEFAULT_HEADER, Catalog, ConflictInfo
from babel.messages.extract import (
DEFAULT_KEYWORDS,
DEFAULT_MAPPING,
Expand Down Expand Up @@ -887,6 +888,262 @@ def run(self):
return


class ConcatenateCatalog(CommandMixin):
description = 'concatenates the specified PO files into single one'
user_options = [
('input-files', None, 'input files'),
('output-file=', 'o', 'write output to specified file, the results are written '
'to standard output if no output file is specified or if it is \'-\''),
('less-than=', '<', 'print messages with less than this many '
'definitions, defaults to infinite if not set'),
('more-than=', '>', 'print messages with more than this many '
'definitions, defaults to 0 if not set'),
('unique', 'u', 'shorthand for --less-than=2, requests '
'that only unique messages be printed'),
('use-first', None, 'use first available translation for each '
'message, don\'t merge several translations'),
('no-location', None, 'do not include location comments with filename and line number'),
('width=', 'w', 'set output line width (default 76)'),
('no-wrap', None, 'do not break long message lines, longer than '
'the output line width, into several lines'),
('sort-output', 's', 'generate sorted output'),
('sort-by-file', 'F', 'sort output by file location'),
]

as_args = 'input-files'

boolean_options = [
'unique',
'use-first',
'no-location',
'strict',
'no-wrap',
'sort-output',
'sort-by-file',
]

def initialize_options(self):
self.input_files = None
self.output_file = None
self.less_than = None
self.more_than = 0
self.unique = False
self.use_first = False
self.no_location = None
self.width = None
self.no_wrap = False
self.sort_output = False
self.sort_by_file = False

def finalize_options(self):
if not self.input_files:
raise OptionError('you must specify the input files')

if self.no_wrap and self.width:
raise OptionError("'--no-wrap' and '--width' are mutually exclusive")
if not self.no_wrap and not self.width:
self.width = 76
elif self.width is not None:
self.width = int(self.width)

if self.more_than is None:
self.more_than = 0
else:
self.more_than = int(self.more_than)
if self.less_than is not None:
self.less_than = int(self.less_than)

if self.unique:
if self.less_than is not None or self.more_than:
raise OptionError("'--unique' is mutually exclusive with '--less-than' and '--more-than'")
self.less_than = 2

def _collect_message_info(self):
templates: list[tuple[str, Catalog]] = []
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like it could be a dict (they are ordered these days, anyway, if the order makes a difference).

message_counts: Counter = Counter()
message_strings: dict[object, set] = defaultdict(set)

for filename in self.input_files:
with open(filename, 'r') as pofile:
template = read_po(pofile)
for message in template:
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should probably also filter out messages with a nullish id.

if not message.id:
continue
message_counts[message.id] += 1
message_strings[message.id].add(
message.string if isinstance(message.string, str) else tuple(message.string)
)
templates.append((filename, template))

return templates, message_counts, message_strings

def run(self):
catalog = Catalog(fuzzy=False)
templates, message_counts, message_strings = self._collect_message_info()

for path, template in templates:
if catalog.locale is None:
catalog.locale = template.locale

for message in template:
if not message.id:
continue

count = message_counts[message.id]
if count <= self.more_than or (self.less_than is not None and count >= self.less_than):
continue

if count > 1 and not self.use_first and len(message_strings[message.id]) > 1:
filename = os.path.basename(path)
catalog.add_conflict(message, filename, template.project, template.version)
message.flags |= {'fuzzy'}

catalog[message.id] = message

catalog.fuzzy = any(message.fuzzy for message in catalog)

output_file = self.output_file
if not output_file or output_file == '-':
write_po(
sys.stdout.buffer,
catalog,
width=self.width,
sort_by_file=self.sort_by_file,
sort_output=self.sort_output,
no_location=self.no_location,
)
else:
with open(output_file, 'wb') as outfile:
write_po(
outfile,
catalog,
width=self.width,
sort_by_file=self.sort_by_file,
sort_output=self.sort_output,
no_location=self.no_location,
)


class MergeCatalog(CommandMixin):
description = 'update a PO file by merging it with a newer POT template, optionally using a compendium'
user_options = [
('input-files', None, 'exactly two input files: def.po (obsolete translations); ref.pot (current template)'),
('compendium=', 'C', 'additional library of message translations, may be specified more than once'),
('compendium-overwrite', None, 'overwrite existing translations with compendium entries'),
('no-compendium-comment', None, 'do not add a comment for translations taken from a compendium'),
('update', 'U', 'update def.po, do nothing if def.po already up to date'),
('output-file=', 'o', 'write output to specified file, the results are written '
'to standard output if no output file is specified'),
Comment on lines +1035 to +1036
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"the results are written to standard output if no output file is specified" – does that currently work?

('backup', None, 'make a backup of def.po'),
('suffix=', None, 'use SUFFIX as backup suffix instead of ~ (tilde)'),
('no-fuzzy-matching', 'N', 'do not use fuzzy matching'),
('no-location', None, 'do not include location comments with filename and line number'),
('width=', 'w', 'set output line width (default 76)'),
('no-wrap', None, 'do not break long message lines, longer '
'than the output line width, into several lines'),
('sort-output', 's', 'generate sorted output'),
('sort-by-file', 'F', 'sort output by file location'),
]

as_args = 'input-files'

multiple_value_options = (
'compendium',
)

boolean_options = [
'compendium-overwrite',
'no-compendium-comment',
'update',
'backup',
'no-fuzzy-matching',
'no-location',
'no-wrap',
'sort-output',
'sort-by-file',
]

def initialize_options(self):
self.input_files = None
self.compendium = None
self.compendium_overwrite = False
self.no_compendium_comment = False
self.update = False
self.output_file = None
self.backup = False
self.suffix = '~'
self.no_fuzzy_matching = False
self.no_location = False
self.width = None
self.no_wrap = False
self.sort_output = False
self.sort_by_file = False

def finalize_options(self):
if not self.input_files or len(self.input_files) != 2:
raise OptionError(
f'exactly two input files are required (def.po and ref.pot), got: {self.input_files!r}'
)
if not self.output_file and not self.update:
raise OptionError('you must specify the output file or use --update')

if self.no_wrap and self.width:
raise OptionError("'--no-wrap' and '--width' are mutually exclusive")
if not self.no_wrap and not self.width:
self.width = 76
elif self.width is not None:
self.width = int(self.width)

def _get_messages_from_compendiums(self, compendium_paths):
if not compendium_paths:
return
for file_path in compendium_paths:
with open(file_path, 'r') as pofile:
catalog = read_po(pofile)
for message in catalog:
yield message, file_path

def run(self):
def_file, ref_file = self.input_files

with open(def_file, 'r') as pofile:
catalog = read_po(pofile)
with open(ref_file, 'r') as pofile:
ref_catalog = read_po(pofile)
catalog.update(
ref_catalog,
no_fuzzy_matching=self.no_fuzzy_matching
)

for message, compendium_path in self._get_messages_from_compendiums(self.compendium):
if (current := catalog.get(message.id)) and (not current.string or current.fuzzy or self.compendium_overwrite):
if self.compendium_overwrite and not current.fuzzy and current.string:
catalog.obsolete[message.id] = current.clone()

current.string = message.string
if current.fuzzy:
current.flags.remove('fuzzy')

if not self.no_compendium_comment:
current.auto_comments.append(compendium_path)

catalog.fuzzy = any(message.fuzzy for message in catalog)
output_path = def_file if self.update else self.output_file

if self.update and self.backup:
shutil.copy(def_file, def_file + self.suffix)

with open(output_path, 'wb') as outfile:
write_po(
outfile,
catalog,
no_location=self.no_location,
width=self.width,
sort_by_file=self.sort_by_file,
sort_output=self.sort_output,
)


class CommandLineInterface:
"""Command-line interface.

Expand All @@ -901,13 +1158,17 @@ class CommandLineInterface:
'extract': 'extract messages from source files and generate a POT file',
'init': 'create new message catalogs from a POT file',
'update': 'update existing message catalogs from a POT file',
'concat': 'concatenates and merges the specified PO files',
'merge': 'combines two PO files into one',
}

command_classes = {
'compile': CompileCatalog,
'extract': ExtractMessages,
'init': InitCatalog,
'update': UpdateCatalog,
'concat': ConcatenateCatalog,
'merge': MergeCatalog,
}

log = None # Replaced on instance level
Expand Down
Loading