Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import json | |
| import pandas as pd | |
| from urllib.request import urlopen | |
| from urllib.error import URLError | |
| import re | |
| from datetime import datetime | |
| CITATION_BUTTON_TEXT = r"""@misc{2023opencompass, | |
| title={OpenCompass: A Universal Evaluation Platform for Foundation Models}, | |
| author={OpenCompass Contributors}, | |
| howpublished = {\url{https://github.com/open-compass/opencompass}}, | |
| year={2023} | |
| }""" | |
| CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" | |
| Predictions_BUTTON_LABEL = "All model predictions are listed here. Access this URL for more details." | |
| Predictions_BUTTON_TEXT = "https://huggingface.co/datasets/opencompass/compass_academic_predictions" | |
| head_style = """ | |
| <style> | |
| @media (min-width: 1536px) | |
| { | |
| .gradio-container { | |
| min-width: var(--size-full) !important; | |
| } | |
| } | |
| </style> | |
| """ | |
| DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/dev-assets/hf-research/" | |
| MAIN_LEADERBOARD_DESCRIPTION = """## Compass Academic Leaderboard (Full Version) | |
| The CompassAcademic currently focuses on the comprehensive reasoning abilities of LLMs. | |
| - The datasets selected so far include General Knowledge Reasoning (MMLU-Pro/GPQA-Diamond), Logical Reasoning (BBH), Mathematical Reasoning (MATH-500, AIME), Code Completion (LiveCodeBench, HumanEval), and Instruction Following (IFEval). | |
| - Currently, the evaluation primarily targets chat models, with updates featuring the latest community models at irregular intervals. | |
| - Prompts and reproduction scripts can be found in [**OpenCompass**: A Toolkit for Evaluation of LLMs](https://github.com/open-compass/opencompass)🏆. | |
| """ | |
| Initial_title = 'Compass Academic Leaderboard' | |
| MODEL_SIZE = ['<10B', '10B-70B', '>70B', 'Unknown'] | |
| MODEL_TYPE = ['API', 'OpenSource'] | |
| def findfile(): | |
| model_meta_info = 'model-meta-info' | |
| results_sum = 'hf-academic' | |
| url = f"{DATA_URL_BASE}{model_meta_info}.json" | |
| response = urlopen(url) | |
| model_info = json.loads(response.read().decode('utf-8')) | |
| url = f"{DATA_URL_BASE}{results_sum}.json" | |
| response = urlopen(url) | |
| results = json.loads(response.read().decode('utf-8')) | |
| return model_info, results | |
| model_info, results = findfile() | |
| def findfile_predictions(): | |
| with open('data/hf-academic-predictions.json', 'r') as file: | |
| predictions = json.load(file) | |
| file.close() | |
| return predictions | |
| def make_results_tab(model_info, results): | |
| models_list, datasets_list = [], [] | |
| for i in model_info: | |
| models_list.append(i) | |
| for i in results.keys(): | |
| datasets_list.append(i) | |
| result_list = [] | |
| index = 1 | |
| for model in models_list: | |
| this_result = {} | |
| this_result['Index'] = index | |
| this_result['Model Name'] = model['display_name'] | |
| this_result['Release Time'] = model['release_time'] | |
| this_result['Parameters'] = model['num_param'] | |
| this_result['OpenSource'] = model['release_type'] | |
| is_all_results_none = 1 | |
| for dataset in datasets_list: | |
| if results[dataset][model['abbr']] != '-': | |
| is_all_results_none = 0 | |
| this_result[dataset] = results[dataset][model['abbr']] | |
| if is_all_results_none == 0: | |
| result_list.append(this_result) | |
| index += 1 | |
| df = pd.DataFrame(result_list) | |
| return df, models_list, datasets_list | |
| def calculate_column_widths(df): | |
| column_widths = [] | |
| for column in df.columns: | |
| header_length = len(str(column)) | |
| max_content_length = df[column].astype(str).map(len).max() | |
| width = max(header_length * 10, max_content_length * 8) + 20 | |
| width = max(160, min(400, width)) | |
| column_widths.append(width) | |
| return column_widths | |
| def show_results_tab(df): | |
| def filter_df(model_name, size_ranges, model_types): | |
| newdf, modellist, datasetlist = make_results_tab(model_info, results) | |
| # search model name | |
| default_val = 'Input the Model Name' | |
| if model_name != default_val: | |
| method_names = [x.split('</a>')[0].split('>')[-1].lower() for x in newdf['Model Name']] | |
| flag = [model_name.lower() in name for name in method_names] | |
| newdf['TEMP'] = flag | |
| newdf = newdf[newdf['TEMP'] == True] | |
| newdf.pop('TEMP') | |
| # filter size | |
| if size_ranges: | |
| def get_size_in_B(param): | |
| if param == 'N/A': | |
| return None | |
| try: | |
| return float(param.replace('B', '')) | |
| except: | |
| return None | |
| newdf['size_in_B'] = newdf['Parameters'].apply(get_size_in_B) | |
| mask = pd.Series(False, index=newdf.index) | |
| for size_range in size_ranges: | |
| if size_range == '<10B': | |
| mask |= (newdf['size_in_B'] < 10) & (newdf['size_in_B'].notna()) | |
| elif size_range == '10B-70B': | |
| mask |= (newdf['size_in_B'] >= 10) & (newdf['size_in_B'] < 70) | |
| elif size_range == '>70B': | |
| mask |= newdf['size_in_B'] >= 70 | |
| elif size_range == 'Unknown': | |
| mask |= newdf['size_in_B'].isna() | |
| newdf = newdf[mask] | |
| newdf.drop('size_in_B', axis=1, inplace=True) | |
| # filter opensource | |
| if model_types: | |
| type_mask = pd.Series(False, index=newdf.index) | |
| for model_type in model_types: | |
| if model_type == 'API': | |
| type_mask |= newdf['OpenSource'] == 'API' | |
| elif model_type == 'OpenSource': | |
| type_mask |= newdf['OpenSource'] == 'OpenSource' | |
| newdf = newdf[type_mask] | |
| # for i in range(len(newdf)): | |
| # newdf.loc[i, 'Index'] = i+1 | |
| return newdf | |
| with gr.Row(): | |
| with gr.Column(): | |
| model_name = gr.Textbox( | |
| value='Input the Model Name', | |
| label='Search Model Name', | |
| interactive=True | |
| ) | |
| with gr.Column(): | |
| size_filter = gr.CheckboxGroup( | |
| choices=MODEL_SIZE, | |
| value=MODEL_SIZE, | |
| label='Model Size', | |
| interactive=True, | |
| ) | |
| with gr.Column(): | |
| type_filter = gr.CheckboxGroup( | |
| choices=MODEL_TYPE, | |
| value=MODEL_TYPE, | |
| label='Model Type', | |
| interactive=True, | |
| ) | |
| # with gr.Row(): | |
| # btn = gr.Button(value="生成表格", interactive=True) | |
| with gr.Column(): | |
| table = gr.DataFrame( | |
| value=df, | |
| interactive=False, | |
| wrap=False, | |
| column_widths=calculate_column_widths(df), | |
| ) | |
| model_name.submit( | |
| fn=filter_df, | |
| inputs=[model_name, size_filter, type_filter], | |
| outputs=table | |
| ) | |
| size_filter.change( | |
| fn=filter_df, | |
| inputs=[model_name, size_filter, type_filter], | |
| outputs=table, | |
| ) | |
| type_filter.change( | |
| fn=filter_df, | |
| inputs=[model_name, size_filter, type_filter], | |
| outputs=table, | |
| ) | |
| # def download_table(): | |
| # newdf, modellist, datasetlist = make_results_tab(model_info, results) | |
| # return newdf.to_csv('df.csv',index=False,sep=',',encoding='utf-8',header=True) | |
| # download_btn = gr.File(visible=True) | |
| # btn.click(fn=download_table, inputs=None, outputs=download_btn) | |
| with gr.Row(): | |
| with gr.Accordion("Storage of Model Predictions", open=True): | |
| citation_button = gr.Textbox( | |
| value=Predictions_BUTTON_TEXT, | |
| label=Predictions_BUTTON_LABEL, | |
| elem_id='predictions-button', | |
| lines=2, # 增加行数 | |
| max_lines=4, # 设置最大行数 | |
| show_copy_button=True # 添加复制按钮使其更方便使用 | |
| ) | |
| with gr.Row(): | |
| with gr.Accordion("Citation", open=True): | |
| citation_button = gr.Textbox( | |
| value=CITATION_BUTTON_TEXT, | |
| label=CITATION_BUTTON_LABEL, | |
| elem_id='citation-button', | |
| lines=6, # 增加行数 | |
| max_lines=8, # 设置最大行数 | |
| show_copy_button=True # 添加复制按钮使其更方便使用 | |
| ) | |
| ERROR_DF = { | |
| "Type": ['NoneType'], | |
| "Details": ["Do not find the combination predictions of the two options above."] | |
| } | |
| def show_predictions_tab(model_list, dataset_list, predictions): | |
| def get_pre_df(model_name, dataset_name): | |
| if dataset_name not in predictions.keys() or model_name not in predictions[dataset_name].keys(): | |
| return pd.DataFrame(ERROR_DF) | |
| this_predictions = predictions[dataset_name][model_name]['predictions'] | |
| for i in range(len(this_predictions)): | |
| this_predictions[i]['origin_prompt'] = str(this_predictions[i]['origin_prompt']) | |
| this_predictions[i]['gold'] = str(this_predictions[i]['gold']) | |
| this_predictions = pd.DataFrame(this_predictions) | |
| return this_predictions | |
| model_list = [i['abbr'] for i in model_list] | |
| initial_predictions = get_pre_df('MiniMax-Text-01', 'IFEval') | |
| with gr.Row(): | |
| with gr.Column(): | |
| model_drop = gr.Dropdown( | |
| label="Model Name", | |
| choices=model_list, # 去重获取主类别 | |
| interactive=True | |
| ) | |
| with gr.Column(): | |
| dataset_drop = gr.Dropdown( | |
| label="Dataset Name", | |
| choices=dataset_list, # 去重获取主类别 | |
| interactive=True | |
| ) | |
| with gr.Column(): | |
| table = gr.DataFrame( | |
| value=initial_predictions, | |
| interactive=False, | |
| wrap=False, | |
| max_height=1000, | |
| column_widths=calculate_column_widths(initial_predictions), | |
| ) | |
| model_drop.change( | |
| fn=get_pre_df, | |
| inputs=[model_drop, dataset_drop], | |
| outputs=table, | |
| ) | |
| dataset_drop.change( | |
| fn=get_pre_df, | |
| inputs=[model_drop, dataset_drop], | |
| outputs=table, | |
| ) | |
| with gr.Row(): | |
| with gr.Accordion("Citation", open=False): | |
| citation_button = gr.Textbox( | |
| value=CITATION_BUTTON_TEXT, | |
| label=CITATION_BUTTON_LABEL, | |
| elem_id='citation-button', | |
| lines=6, # 增加行数 | |
| max_lines=8, # 设置最大行数 | |
| show_copy_button=True # 添加复制按钮使其更方便使用 | |
| ) | |
| def create_interface(): | |
| df, model_list, dataset_list = make_results_tab(model_info, results) | |
| predictions = findfile_predictions() | |
| with gr.Blocks() as demo: | |
| # title_comp = gr.Markdown(Initial_title) | |
| gr.Markdown(MAIN_LEADERBOARD_DESCRIPTION) | |
| with gr.Tabs(elem_classes='tab-buttons') as tabs: | |
| with gr.TabItem('Results', elem_id='main', id=0): | |
| show_results_tab(df) | |
| # with gr.TabItem('Predictions', elem_id='notmain', id=1): | |
| # show_predictions_tab(model_list, dataset_list, predictions) | |
| return demo | |
| # model_info, results = findfile() | |
| # breakpoint() | |
| if __name__ == '__main__': | |
| demo = create_interface() | |
| demo.queue() | |
| demo.launch(server_name='0.0.0.0') | |