-
Notifications
You must be signed in to change notification settings - Fork 0
For rebuttal, add GLiNER2 support for CV evaluation and define GLINER_SCHEMA #101
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,7 +17,14 @@ | |
| from gimbench.log import get_logger | ||
| from gimbench.models import SimpleGIM | ||
|
|
||
| from .schema import CV_FIELDS, GIMKIT_TEMPLATE, OUTLINES_JSON_SCHEMA, OUTLINES_TEMPLATE, SHARED_PROMPT_PREFIX | ||
| from .schema import ( | ||
| CV_FIELDS, | ||
| GIMKIT_TEMPLATE, | ||
| GLINER_SCHEMA, | ||
| OUTLINES_JSON_SCHEMA, | ||
| OUTLINES_TEMPLATE, | ||
| SHARED_PROMPT_PREFIX, | ||
| ) | ||
|
|
||
|
|
||
| logger = get_logger(__name__) | ||
|
|
@@ -236,7 +243,39 @@ def _extract_fields(self, cv_content: str) -> dict[str, str]: | |
| raise ValueError(f"Expected dict but got {type(extraction).__name__}: {extraction}") | ||
|
|
||
|
|
||
| class GLiNEREvaluator(CVEvaluator): | ||
| def __init__(self, args: Namespace, dataset: Dataset): | ||
| super().__init__(args, dataset) | ||
| try: | ||
| from gliner2 import GLiNER2 | ||
| except ImportError: | ||
| raise ImportError( | ||
| "The 'gliner2' package is required but not installed. " | ||
| "Please install it manually using `pip install gliner2` or `uv add gliner2` " | ||
| "to evaluate using this model." | ||
|
Comment on lines
+252
to
+255
|
||
| ) | ||
| self.model = GLiNER2.from_pretrained(args.model_name) | ||
|
|
||
| def _extract_fields(self, cv_content: str) -> dict[str, str]: | ||
| # GLiNER2 has a length limit, let's truncate just in case, or pass directly | ||
| result = self.model.extract_json(cv_content, GLINER_SCHEMA) | ||
|
|
||
| extraction = {} | ||
| if "cv" in result and isinstance(result["cv"], list) and len(result["cv"]) > 0: | ||
| extracted_item = result["cv"][0] | ||
| if isinstance(extracted_item, dict): | ||
| for field in CV_FIELDS: | ||
| val = extracted_item.get(field, "") | ||
| extraction[field] = str(val) if val is not None else "" | ||
| return extraction | ||
|
|
||
|
|
||
|
Comment on lines
+266
to
+272
|
||
| def conduct_eval(args: Namespace, ds: Dataset): | ||
| evaluator = OutlinesEvaluator(args, ds) if args.use_outlines else GIMEvaluator(args, ds) | ||
| if args.use_outlines: | ||
| evaluator = OutlinesEvaluator(args, ds) | ||
| elif getattr(args, "use_gliner2", False): | ||
| evaluator = GLiNEREvaluator(args, ds) | ||
| else: | ||
| evaluator = GIMEvaluator(args, ds) | ||
| result = evaluator.evaluate() | ||
| result.dump() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -90,3 +90,21 @@ class CVData(BaseModel): | |
|
|
||
|
|
||
| OUTLINES_JSON_SCHEMA = CVData.model_json_schema() | ||
|
|
||
| GLINER_SCHEMA = { | ||
| "cv": [ | ||
| "name::str::Full name of the person", | ||
| "country::str::Country, nationality, or country of residence", | ||
| "birthday::str::Date of birth", | ||
| "phone_number::str::Phone number", | ||
| "email::str::Email address", | ||
| "highest_level_degree::[Bachelor|Master|PhD]::str::Highest educational degree", | ||
| "university::str::University name", | ||
|
Comment on lines
+99
to
+102
|
||
| "department::str::Department or school", | ||
| "major::str::Major or field of study", | ||
| "start_date::str::Start date of education", | ||
| "end_date::str::End date of education", | ||
| "homepage_url::str::Personal homepage URL", | ||
| "github_url::str::GitHub profile URL", | ||
| ] | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
--use_outlinesand--use_gliner2can both be set, but evaluator selection currently depends on precedence elsewhere. Consider making these options mutually exclusive at the argument-parsing/validation layer so users get an immediate, clear error if they pass both.