Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/gimbench/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ def _add_scierc_eval_args(parser):

def _add_cv_eval_args(parser):
parser.add_argument("--use_outlines", action="store_true", help="Whether to use outlines in CV evaluation")
parser.add_argument("--use_gliner2", action="store_true", help="Whether to use GLiNER2 in CV evaluation")
parser.add_argument(
Comment on lines 203 to 206
Copy link

Copilot AI Apr 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

--use_outlines and --use_gliner2 can both be set, but evaluator selection currently depends on precedence elsewhere. Consider making these options mutually exclusive at the argument-parsing/validation layer so users get an immediate, clear error if they pass both.

Copilot uses AI. Check for mistakes.
"--judge_model_name",
type=str,
Expand Down
43 changes: 41 additions & 2 deletions src/gimbench/cv/evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,14 @@
from gimbench.log import get_logger
from gimbench.models import SimpleGIM

from .schema import CV_FIELDS, GIMKIT_TEMPLATE, OUTLINES_JSON_SCHEMA, OUTLINES_TEMPLATE, SHARED_PROMPT_PREFIX
from .schema import (
CV_FIELDS,
GIMKIT_TEMPLATE,
GLINER_SCHEMA,
OUTLINES_JSON_SCHEMA,
OUTLINES_TEMPLATE,
SHARED_PROMPT_PREFIX,
)


logger = get_logger(__name__)
Expand Down Expand Up @@ -236,7 +243,39 @@ def _extract_fields(self, cv_content: str) -> dict[str, str]:
raise ValueError(f"Expected dict but got {type(extraction).__name__}: {extraction}")


class GLiNEREvaluator(CVEvaluator):
def __init__(self, args: Namespace, dataset: Dataset):
super().__init__(args, dataset)
try:
from gliner2 import GLiNER2
except ImportError:
raise ImportError(
"The 'gliner2' package is required but not installed. "
"Please install it manually using `pip install gliner2` or `uv add gliner2` "
"to evaluate using this model."
Comment on lines +252 to +255
Copy link

Copilot AI Apr 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment says the CV content will be truncated due to a GLiNER2 length limit, but the code passes cv_content directly with no truncation. Either implement truncation based on the model’s documented limit or update/remove the comment to reflect actual behavior.

Copilot uses AI. Check for mistakes.
)
self.model = GLiNER2.from_pretrained(args.model_name)

def _extract_fields(self, cv_content: str) -> dict[str, str]:
# GLiNER2 has a length limit, let's truncate just in case, or pass directly
result = self.model.extract_json(cv_content, GLINER_SCHEMA)

extraction = {}
if "cv" in result and isinstance(result["cv"], list) and len(result["cv"]) > 0:
extracted_item = result["cv"][0]
if isinstance(extracted_item, dict):
for field in CV_FIELDS:
val = extracted_item.get(field, "")
extraction[field] = str(val) if val is not None else ""
return extraction


Comment on lines +266 to +272
Copy link

Copilot AI Apr 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

conduct_eval silently prioritizes --use_outlines over --use_gliner2 if both flags are set. This makes CLI behavior ambiguous and can lead to unexpected evaluator selection. Consider validating these flags as mutually exclusive (e.g., in validate_and_standardize or here) and raising a clear error when both are provided.

Copilot uses AI. Check for mistakes.
def conduct_eval(args: Namespace, ds: Dataset):
evaluator = OutlinesEvaluator(args, ds) if args.use_outlines else GIMEvaluator(args, ds)
if args.use_outlines:
evaluator = OutlinesEvaluator(args, ds)
elif getattr(args, "use_gliner2", False):
evaluator = GLiNEREvaluator(args, ds)
else:
evaluator = GIMEvaluator(args, ds)
result = evaluator.evaluate()
result.dump()
18 changes: 18 additions & 0 deletions src/gimbench/cv/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,21 @@ class CVData(BaseModel):


OUTLINES_JSON_SCHEMA = CVData.model_json_schema()

GLINER_SCHEMA = {
"cv": [
"name::str::Full name of the person",
"country::str::Country, nationality, or country of residence",
"birthday::str::Date of birth",
"phone_number::str::Phone number",
"email::str::Email address",
"highest_level_degree::[Bachelor|Master|PhD]::str::Highest educational degree",
"university::str::University name",
Comment on lines +99 to +102
Copy link

Copilot AI Apr 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GLINER_SCHEMA entries appear to use a consistent field::type::description format, but highest_level_degree has an extra segment (highest_level_degree::[Bachelor|Master|PhD]::str::...). This inconsistency is likely to break whatever parsing GLiNER2.extract_json does for schema strings. Align this entry’s format with the others (and with the expected GLiNER2 schema syntax) so it uses the same number of ::-separated parts.

Copilot uses AI. Check for mistakes.
"department::str::Department or school",
"major::str::Major or field of study",
"start_date::str::Start date of education",
"end_date::str::End date of education",
"homepage_url::str::Personal homepage URL",
"github_url::str::GitHub profile URL",
]
}
Loading