60 lines
2.4 KiB
Python
60 lines
2.4 KiB
Python
import json
|
|
import os
|
|
from collections import defaultdict
|
|
|
|
import numpy as np
|
|
|
|
import argparse
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser(description='ChatGPT-based QA evaluation.')
|
|
parser.add_argument('-d', '--dir', default=None)
|
|
parser.add_argument('-v', '--version', default=None)
|
|
parser.add_argument('-s', '--select', nargs='*', default=None)
|
|
parser.add_argument('-f', '--files', nargs='*', default=[])
|
|
parser.add_argument('-i', '--ignore', nargs='*', default=[])
|
|
return parser.parse_args()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
args = parse_args()
|
|
|
|
if args.ignore is not None:
|
|
args.ignore = [int(x) for x in args.ignore]
|
|
|
|
if len(args.files) > 0:
|
|
review_files = args.files
|
|
else:
|
|
review_files = [x for x in os.listdir(args.dir) if x.endswith('.jsonl') and (x.startswith('gpt4_text') or x.startswith('reviews_') or x.startswith('review_') or 'review' in args.dir)]
|
|
|
|
for review_file in sorted(review_files):
|
|
config = os.path.basename(review_file).replace('gpt4_text_', '').replace('.jsonl', '')
|
|
if args.select is not None and any(x not in config for x in args.select):
|
|
continue
|
|
if '0613' in config:
|
|
version = '0613'
|
|
else:
|
|
version = '0314'
|
|
if args.version is not None and args.version != version:
|
|
continue
|
|
scores = defaultdict(list)
|
|
print(config)
|
|
with open(os.path.join(args.dir, review_file) if args.dir is not None else review_file) as f:
|
|
for review_str in f:
|
|
review = json.loads(review_str)
|
|
if review['question_id'] in args.ignore:
|
|
continue
|
|
if 'category' in review:
|
|
scores[review['category']].append(review['tuple'])
|
|
scores['all'].append(review['tuple'])
|
|
else:
|
|
if 'tuple' in review:
|
|
scores['all'].append(review['tuple'])
|
|
else:
|
|
scores['all'].append(review['score'])
|
|
for k, v in sorted(scores.items()):
|
|
stats = np.asarray(v).mean(0).tolist()
|
|
stats = [round(x, 3) for x in stats]
|
|
# print(k, stats, round(stats[1]/stats[0]*100, 1))
|
|
print(k, round(stats[1]/stats[0]*100, 1), round(stats[0] * 10, 1), round(stats[1] * 10, 1))
|
|
print('=================================')
|