112 lines
4 KiB
Python
112 lines
4 KiB
Python
|
"""Generate json file for webpage."""
|
||
|
import json
|
||
|
import os
|
||
|
import re
|
||
|
|
||
|
# models = ['llama', 'alpaca', 'gpt35', 'bard']
|
||
|
models = ['vicuna']
|
||
|
|
||
|
|
||
|
def read_jsonl(path: str, key: str=None):
|
||
|
data = []
|
||
|
with open(os.path.expanduser(path)) as f:
|
||
|
for line in f:
|
||
|
if not line:
|
||
|
continue
|
||
|
data.append(json.loads(line))
|
||
|
if key is not None:
|
||
|
data.sort(key=lambda x: x[key])
|
||
|
data = {item[key]: item for item in data}
|
||
|
return data
|
||
|
|
||
|
|
||
|
def trim_hanging_lines(s: str, n: int) -> str:
|
||
|
s = s.strip()
|
||
|
for _ in range(n):
|
||
|
s = s.split('\n', 1)[1].strip()
|
||
|
return s
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
questions = read_jsonl('table/question.jsonl', key='question_id')
|
||
|
|
||
|
# alpaca_answers = read_jsonl('table/answer/answer_alpaca-13b.jsonl', key='question_id')
|
||
|
# bard_answers = read_jsonl('table/answer/answer_bard.jsonl', key='question_id')
|
||
|
# gpt35_answers = read_jsonl('table/answer/answer_gpt35.jsonl', key='question_id')
|
||
|
# llama_answers = read_jsonl('table/answer/answer_llama-13b.jsonl', key='question_id')
|
||
|
vicuna_answers = read_jsonl('table/answer/answer_vicuna-13b.jsonl', key='question_id')
|
||
|
ours_answers = read_jsonl('table/results/llama-13b-hf-alpaca.jsonl', key='question_id')
|
||
|
|
||
|
review_vicuna = read_jsonl('table/review/review_vicuna-13b_llama-13b-hf-alpaca.jsonl', key='question_id')
|
||
|
# review_alpaca = read_jsonl('table/review/review_alpaca-13b_vicuna-13b.jsonl', key='question_id')
|
||
|
# review_bard = read_jsonl('table/review/review_bard_vicuna-13b.jsonl', key='question_id')
|
||
|
# review_gpt35 = read_jsonl('table/review/review_gpt35_vicuna-13b.jsonl', key='question_id')
|
||
|
# review_llama = read_jsonl('table/review/review_llama-13b_vicuna-13b.jsonl', key='question_id')
|
||
|
|
||
|
records = []
|
||
|
for qid in questions.keys():
|
||
|
r = {
|
||
|
'id': qid,
|
||
|
'category': questions[qid]['category'],
|
||
|
'question': questions[qid]['text'],
|
||
|
'answers': {
|
||
|
# 'alpaca': alpaca_answers[qid]['text'],
|
||
|
# 'llama': llama_answers[qid]['text'],
|
||
|
# 'bard': bard_answers[qid]['text'],
|
||
|
# 'gpt35': gpt35_answers[qid]['text'],
|
||
|
'vicuna': vicuna_answers[qid]['text'],
|
||
|
'ours': ours_answers[qid]['text'],
|
||
|
},
|
||
|
'evaluations': {
|
||
|
# 'alpaca': review_alpaca[qid]['text'],
|
||
|
# 'llama': review_llama[qid]['text'],
|
||
|
# 'bard': review_bard[qid]['text'],
|
||
|
'vicuna': review_vicuna[qid]['content'],
|
||
|
# 'gpt35': review_gpt35[qid]['text'],
|
||
|
},
|
||
|
'scores': {
|
||
|
'vicuna': review_vicuna[qid]['tuple'],
|
||
|
# 'alpaca': review_alpaca[qid]['score'],
|
||
|
# 'llama': review_llama[qid]['score'],
|
||
|
# 'bard': review_bard[qid]['score'],
|
||
|
# 'gpt35': review_gpt35[qid]['score'],
|
||
|
},
|
||
|
}
|
||
|
|
||
|
# cleanup data
|
||
|
cleaned_evals = {}
|
||
|
for k, v in r['evaluations'].items():
|
||
|
v = v.strip()
|
||
|
lines = v.split('\n')
|
||
|
# trim the first line if it's a pair of numbers
|
||
|
if re.match(r'\d+[, ]+\d+', lines[0]):
|
||
|
lines = lines[1:]
|
||
|
v = '\n'.join(lines)
|
||
|
cleaned_evals[k] = v.replace('Assistant 1', "**Assistant 1**").replace('Assistant 2', '**Assistant 2**')
|
||
|
|
||
|
r['evaluations'] = cleaned_evals
|
||
|
records.append(r)
|
||
|
|
||
|
# Reorder the records, this is optional
|
||
|
for r in records:
|
||
|
if r['id'] <= 20:
|
||
|
r['id'] += 60
|
||
|
else:
|
||
|
r['id'] -= 20
|
||
|
for r in records:
|
||
|
if r['id'] <= 50:
|
||
|
r['id'] += 10
|
||
|
elif 50 < r['id'] <= 60:
|
||
|
r['id'] -= 50
|
||
|
for r in records:
|
||
|
if r['id'] == 7:
|
||
|
r['id'] = 1
|
||
|
elif r['id'] < 7:
|
||
|
r['id'] += 1
|
||
|
|
||
|
records.sort(key=lambda x: x['id'])
|
||
|
|
||
|
# Write to file
|
||
|
with open('webpage/data.json', 'w') as f:
|
||
|
json.dump({'questions': records, 'models': models}, f, indent=2)
|