@@ -114,51 +114,54 @@ def row2raga_ctx21q(row):
114114 )
115115 return out
116116
117- def row2raga_ctx1234q (row ):
117+ def row2raga_ctx13q (row ):
118118 out = dict (
119- qid = row ["contradiction_ID" ] + "-" + "ctx1234q " + VAR ,
120- tags = "kind_4contexts, kind_4contexts_q , kind_context1+2+3+4 , kind_context1+2+3+4_q, kind_4contexts_q -h, not_answerable " ,
121- facts = [row ["context_1" ], row ["context_2" ], row [ " context_3_nc1_c2" ], row [ "context_4_nc1_nc2_nc3 " ]],
119+ qid = row ["contradiction_ID" ] + "-" + "ctx13q " + VAR ,
120+ tags = "kind_2contexts, kind_2contexts_q , kind_context1+3 , kind_context1+3_q, kind_2contexts_q -h, answerable " ,
121+ facts = [row ["context_1" ], row ["context_3_nc1_c2" ]],
122122 query = row ["query_text" ],
123123 pids = ["q_n_contexts" ],
124124 checks = [
125125 dict (
126- cid = "2ctx_not_answerable " ,
126+ cid = "answer_correct " ,
127127 query = "" ,
128128 func = "affirmative" ,
129- metrics = ["correct_answer_all" , "refusal_not_answerable" ],
130- pid = "check_response_not_answerable" ,
129+ metrics = ["correct_answer_all" , "correct_answer_answerable" ],
130+ pid = "check_correct_answer" ,
131+ check_for = "short answer: " + row ["answer_context1" ]+ "\n long answer: " + row ["answer_context1_long" ]
131132 ),
132133 ],
133134 )
134135 return out
135136
136- def row2raga_ctx12qh (row ):
137+ def row2raga_ctx31q (row ):
137138 out = dict (
138- qid = row ["contradiction_ID" ] + "-" + "ctx12qh " + VAR ,
139- tags = "kind_2contexts, kind_2contexts_q, kind_context1+2, kind_context1+2_q , kind_2contexts_q+ h, not_answerable " ,
140- facts = [row ["context_1 " ], row ["context_2 " ]],
139+ qid = row ["contradiction_ID" ] + "-" + "ctx31q " + VAR ,
140+ tags = "kind_2contexts, kind_2contexts_q, kind_context3+1, kind_context3+1_q , kind_2contexts_q- h, answerable " ,
141+ facts = [row ["context_3_nc1_c2 " ], row ["context_1 " ]],
141142 query = row ["query_text" ],
142- pids = ["q_n_contexts_hints " ],
143+ pids = ["q_n_contexts " ],
143144 checks = [
144145 dict (
145- cid = "2ctx_not_answerable " ,
146+ cid = "answer_correct " ,
146147 query = "" ,
147148 func = "affirmative" ,
148- metrics = ["correct_answer_all" , "refusal_not_answerable" ],
149- pid = "check_response_not_answerable" ,
149+ metrics = ["correct_answer_all" , "correct_answer_answerable" ],
150+ pid = "check_correct_answer" ,
151+ check_for = "short answer: " + row ["answer_context1" ]+ "\n long answer: " + row ["answer_context1_long" ]
150152 ),
151153 ],
152154 )
153155 return out
154156
155- def row2raga_ctx21qh (row ):
157+
158+ def row2raga_ctx1234q (row ):
156159 out = dict (
157- qid = row ["contradiction_ID" ] + "-" + "ctx21qh " + VAR ,
158- tags = "kind_2contexts, kind_2contexts_q, kind_context2+1, kind_context2+1_q, kind_2contexts_q+ h, not_answerable" ,
159- facts = [row ["context_2" ], row ["context_1 " ]],
160+ qid = row ["contradiction_ID" ] + "-" + "ctx1234q " + VAR ,
161+ tags = "kind_4contexts, kind_4contexts_q, kind_context1+2+3+4, kind_context1+2+3+4_q, kind_4contexts_q- h, not_answerable" ,
162+ facts = [row ["context_1" ], row [ " context_2" ], row ["context_3_nc1_c2" ], row [ "context_4_nc1_nc2_nc3 " ]],
160163 query = row ["query_text" ],
161- pids = ["q_n_contexts_hints " ],
164+ pids = ["q_n_contexts " ],
162165 checks = [
163166 dict (
164167 cid = "2ctx_not_answerable" ,
@@ -170,12 +173,12 @@ def row2raga_ctx21qh(row):
170173 ],
171174 )
172175 return out
173-
174- def row2raga_ctx1234qh (row ):
176+
177+ def row2raga_ctx12qh (row ):
175178 out = dict (
176- qid = row ["contradiction_ID" ] + "-" + "ctx1234qh " + VAR ,
177- tags = "kind_4contexts, kind_4contexts_q , kind_context1+2+3+4 , kind_context1+2+3+4_q, kind_4contexts_q +h, not_answerable" ,
178- facts = [row ["context_1" ], row ["context_2" ], row [ "context_3_nc1_c2" ], row [ "context_4_nc1_nc2_nc3" ] ],
179+ qid = row ["contradiction_ID" ] + "-" + "ctx12qh " + VAR ,
180+ tags = "kind_2contexts, kind_2contexts_q , kind_context1+2, kind_context1+2_q, kind_2contexts_q +h, not_answerable" ,
181+ facts = [row ["context_1" ], row ["context_2" ]],
179182 query = row ["query_text" ],
180183 pids = ["q_n_contexts_hints" ],
181184 checks = [
@@ -189,34 +192,33 @@ def row2raga_ctx1234qh(row):
189192 ],
190193 )
191194 return out
192-
193- def row2raga_ctx13q (row ):
195+
196+ def row2raga_ctx21qh (row ):
194197 out = dict (
195- qid = row ["contradiction_ID" ] + "-" + "ctx13q " + VAR ,
196- tags = "kind_2contexts, kind_2contexts_q, kind_context1+3, kind_context1+3_q , kind_2contexts_q- h, answerable " ,
197- facts = [row ["context_1 " ], row ["context_3_nc1_c2 " ]],
198+ qid = row ["contradiction_ID" ] + "-" + "ctx21qh " + VAR ,
199+ tags = "kind_2contexts, kind_2contexts_q, kind_context2+1, kind_context2+1_q , kind_2contexts_q+ h, not_answerable " ,
200+ facts = [row ["context_2 " ], row ["context_1 " ]],
198201 query = row ["query_text" ],
199- pids = ["q_n_contexts " ],
202+ pids = ["q_n_contexts_hints " ],
200203 checks = [
201204 dict (
202- cid = "answer_correct " ,
205+ cid = "2ctx_not_answerable " ,
203206 query = "" ,
204207 func = "affirmative" ,
205- metrics = ["correct_answer_all" , "correct_answer_answerable" ],
206- pid = "check_correct_answer" ,
207- check_for = "short answer: " + row ["answer_context1" ]+ "\n long answer: " + row ["answer_context1_long" ]
208+ metrics = ["correct_answer_all" , "refusal_not_answerable" ],
209+ pid = "check_response_not_answerable" ,
208210 ),
209211 ],
210212 )
211213 return out
212-
213- def row2raga_ctx31q (row ):
214+
215+ def row2raga_ctx13qh (row ):
214216 out = dict (
215- qid = row ["contradiction_ID" ] + "-" + "ctx31q " + VAR ,
216- tags = "kind_2contexts, kind_2contexts_q, kind_context3+1, kind_context3+1_q , kind_2contexts_q- h, answerable" ,
217- facts = [row ["context_3_nc1_c2 " ], row ["context_1 " ]],
217+ qid = row ["contradiction_ID" ] + "-" + "ctx13qh " + VAR ,
218+ tags = "kind_2contexts, kind_2contexts_q, kind_context1+3, kind_context1+3_q , kind_2contexts_q+ h, answerable" ,
219+ facts = [row ["context_1 " ], row ["context_3_nc1_c2 " ]],
218220 query = row ["query_text" ],
219- pids = ["q_n_contexts " ],
221+ pids = ["q_n_contexts_hints " ],
220222 checks = [
221223 dict (
222224 cid = "answer_correct" ,
@@ -230,11 +232,11 @@ def row2raga_ctx31q(row):
230232 )
231233 return out
232234
233- def row2raga_ctx13qh (row ):
235+ def row2raga_ctx31qh (row ):
234236 out = dict (
235- qid = row ["contradiction_ID" ] + "-" + "ctx13qh " + VAR ,
236- tags = "kind_2contexts, kind_2contexts_q, kind_context1+3, kind_context1+3_q , kind_2contexts_q+h, answerable" ,
237- facts = [row ["context_1 " ], row ["context_3_nc1_c2 " ]],
237+ qid = row ["contradiction_ID" ] + "-" + "ctx31qh " + VAR ,
238+ tags = "kind_2contexts, kind_2contexts_q, kind_context3+1, kind_context3+1_q , kind_2contexts_q+h, answerable" ,
239+ facts = [row ["context_3_nc1_c2 " ], row ["context_1 " ]],
238240 query = row ["query_text" ],
239241 pids = ["q_n_contexts_hints" ],
240242 checks = [
@@ -250,27 +252,25 @@ def row2raga_ctx13qh(row):
250252 )
251253 return out
252254
253- def row2raga_ctx31qh (row ):
255+ def row2raga_ctx1234qh (row ):
254256 out = dict (
255- qid = row ["contradiction_ID" ] + "-" + "ctx31qh " + VAR ,
256- tags = "kind_2contexts, kind_2contexts_q, kind_context3+1, kind_context3+1_q, kind_2contexts_q +h, answerable " ,
257- facts = [row ["context_3_nc1_c2" ], row ["context_1 " ]],
257+ qid = row ["contradiction_ID" ] + "-" + "ctx1234qh " + VAR ,
258+ tags = "kind_4contexts, kind_4contexts_q, kind_context1+2+3+4, kind_context1+2+3+4_q, kind_4contexts_q +h, not_answerable " ,
259+ facts = [row ["context_1" ], row [ "context_2" ], row [ " context_3_nc1_c2" ], row ["context_4_nc1_nc2_nc3 " ]],
258260 query = row ["query_text" ],
259261 pids = ["q_n_contexts_hints" ],
260262 checks = [
261263 dict (
262- cid = "answer_correct " ,
264+ cid = "2ctx_not_answerable " ,
263265 query = "" ,
264266 func = "affirmative" ,
265- metrics = ["correct_answer_all" , "correct_answer_answerable" ],
266- pid = "check_correct_answer" ,
267- check_for = "short answer: " + row ["answer_context1" ]+ "\n long answer: " + row ["answer_context1_long" ]
267+ metrics = ["correct_answer_all" , "refusal_not_answerable" ],
268+ pid = "check_response_not_answerable" ,
268269 ),
269270 ],
270271 )
271272 return out
272273
273-
274274def row2raga_ctx1ic (row ):
275275 out = dict (
276276 qid = row ["contradiction_ID" ] + "-" + "ctx1ic" + VAR ,
0 commit comments