Raw Runs

RunAdaptation method
math:subject=precalculus,level=1,use_official_examples=True,use_chain_of_thought=False,model=together_yalmgeneration
math:subject=precalculus,level=1,use_official_examples=True,use_chain_of_thought=False,model=writer_palmyra-instruct-30generation
math:subject=precalculus,level=1,use_official_examples=True,use_chain_of_thought=False,model=writer_palmyra-xgeneration
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=AlephAlpha_luminous-base,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=AlephAlpha_luminous-extended,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=AlephAlpha_luminous-supreme,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=ai21_j1-grande,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=ai21_j1-grande-v2-beta,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=ai21_j1-jumbo,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=ai21_j1-large,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=ai21_j2-grande,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=ai21_j2-jumbo,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=ai21_j2-large,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=anthropic_stanford-online-all-v4-s3,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=anthropic_stanford-online-all-v4-s3,groups=ablation_multiple_choicemultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=cohere_command-medium-beta,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=cohere_command-xlarge-beta,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=cohere_large-20220720,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=cohere_medium-20220720,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=cohere_medium-20221108,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=cohere_small-20220720,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=cohere_xlarge-20220609,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=cohere_xlarge-20221108,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=microsoft_TNLGv2_530B,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=microsoft_TNLGv2_7B,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=openai_ada,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=openai_babbage,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=openai_curie,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=openai_davinci,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=openai_gpt-3.5-turbo-0301,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=openai_text-ada-001,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=openai_text-babbage-001,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=openai_text-curie-001,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=openai_text-davinci-002,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=openai_text-davinci-003,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=together_bloom,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=together_bloom,groups=ablation_multiple_choicemultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=together_glm,data_augmentation=canonical,stop=hashmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=together_gpt-j-6b,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=together_gpt-j-6b,groups=ablation_multiple_choicemultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=together_gpt-neox-20b,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=together_gpt-neox-20b,groups=ablation_multiple_choicemultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=together_opt-175b,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=together_opt-175b,groups=ablation_multiple_choicemultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=together_opt-66b,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=together_opt-66b,groups=ablation_multiple_choicemultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=together_redpajama-incite-base-3b-v1,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=together_t0pp,data_augmentation=canonical,stop=hashmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=together_t5-11b,data_augmentation=canonical,stop=hashmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=together_ul2,data_augmentation=canonical,stop=hash,global_prefix=nlgmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=together_yalm,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=writer_palmyra-instruct-30,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_joint,model=writer_palmyra-x,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=abstract_algebra,method=multiple_choice_separate_calibrated,model=anthropic_stanford-online-all-v4-s3,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
mmlu:subject=abstract_algebra,method=multiple_choice_separate_calibrated,model=together_bloom,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
mmlu:subject=abstract_algebra,method=multiple_choice_separate_calibrated,model=together_gpt-j-6b,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
mmlu:subject=abstract_algebra,method=multiple_choice_separate_calibrated,model=together_gpt-neox-20b,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
mmlu:subject=abstract_algebra,method=multiple_choice_separate_calibrated,model=together_opt-175b,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
mmlu:subject=abstract_algebra,method=multiple_choice_separate_calibrated,model=together_opt-66b,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
mmlu:subject=abstract_algebra,method=multiple_choice_separate_original,model=anthropic_stanford-online-all-v4-s3,groups=ablation_multiple_choicemultiple_choice_separate_original
mmlu:subject=abstract_algebra,method=multiple_choice_separate_original,model=together_bloom,groups=ablation_multiple_choicemultiple_choice_separate_original
mmlu:subject=abstract_algebra,method=multiple_choice_separate_original,model=together_gpt-j-6b,groups=ablation_multiple_choicemultiple_choice_separate_original
mmlu:subject=abstract_algebra,method=multiple_choice_separate_original,model=together_gpt-neox-20b,groups=ablation_multiple_choicemultiple_choice_separate_original
mmlu:subject=abstract_algebra,method=multiple_choice_separate_original,model=together_opt-175b,groups=ablation_multiple_choicemultiple_choice_separate_original
mmlu:subject=abstract_algebra,method=multiple_choice_separate_original,model=together_opt-66b,groups=ablation_multiple_choicemultiple_choice_separate_original
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=AlephAlpha_luminous-base,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=AlephAlpha_luminous-extended,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=AlephAlpha_luminous-supreme,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=ai21_j1-grande,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=ai21_j1-grande-v2-beta,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=ai21_j1-jumbo,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=ai21_j1-large,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=ai21_j2-grande,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=ai21_j2-jumbo,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=ai21_j2-large,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=anthropic_stanford-online-all-v4-s3,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=anthropic_stanford-online-all-v4-s3,groups=ablation_multiple_choicemultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=cohere_command-medium-beta,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=cohere_command-xlarge-beta,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=cohere_large-20220720,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=cohere_medium-20220720,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=cohere_medium-20221108,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=cohere_small-20220720,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=cohere_xlarge-20220609,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=cohere_xlarge-20221108,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=microsoft_TNLGv2_530B,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=microsoft_TNLGv2_7B,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=openai_ada,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=openai_babbage,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=openai_curie,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=openai_davinci,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=openai_gpt-3.5-turbo-0301,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=openai_text-ada-001,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=openai_text-babbage-001,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=openai_text-curie-001,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=openai_text-davinci-002,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=openai_text-davinci-003,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=together_bloom,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=together_bloom,groups=ablation_multiple_choicemultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=together_glm,data_augmentation=canonical,stop=hashmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=together_gpt-j-6b,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=together_gpt-j-6b,groups=ablation_multiple_choicemultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=together_gpt-neox-20b,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=together_gpt-neox-20b,groups=ablation_multiple_choicemultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=together_opt-175b,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=together_opt-175b,groups=ablation_multiple_choicemultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=together_opt-66b,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=together_opt-66b,groups=ablation_multiple_choicemultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=together_redpajama-incite-base-3b-v1,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=together_t0pp,data_augmentation=canonical,stop=hashmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=together_t5-11b,data_augmentation=canonical,stop=hashmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=together_ul2,data_augmentation=canonical,stop=hash,global_prefix=nlgmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=together_yalm,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=writer_palmyra-instruct-30,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_joint,model=writer_palmyra-x,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=college_chemistry,method=multiple_choice_separate_calibrated,model=anthropic_stanford-online-all-v4-s3,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
mmlu:subject=college_chemistry,method=multiple_choice_separate_calibrated,model=together_bloom,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
mmlu:subject=college_chemistry,method=multiple_choice_separate_calibrated,model=together_gpt-j-6b,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
mmlu:subject=college_chemistry,method=multiple_choice_separate_calibrated,model=together_gpt-neox-20b,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
mmlu:subject=college_chemistry,method=multiple_choice_separate_calibrated,model=together_opt-175b,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
mmlu:subject=college_chemistry,method=multiple_choice_separate_calibrated,model=together_opt-66b,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
mmlu:subject=college_chemistry,method=multiple_choice_separate_original,model=anthropic_stanford-online-all-v4-s3,groups=ablation_multiple_choicemultiple_choice_separate_original
mmlu:subject=college_chemistry,method=multiple_choice_separate_original,model=together_bloom,groups=ablation_multiple_choicemultiple_choice_separate_original
mmlu:subject=college_chemistry,method=multiple_choice_separate_original,model=together_gpt-j-6b,groups=ablation_multiple_choicemultiple_choice_separate_original
mmlu:subject=college_chemistry,method=multiple_choice_separate_original,model=together_gpt-neox-20b,groups=ablation_multiple_choicemultiple_choice_separate_original
mmlu:subject=college_chemistry,method=multiple_choice_separate_original,model=together_opt-175b,groups=ablation_multiple_choicemultiple_choice_separate_original
mmlu:subject=college_chemistry,method=multiple_choice_separate_original,model=together_opt-66b,groups=ablation_multiple_choicemultiple_choice_separate_original
mmlu:subject=computer_security,method=multiple_choice_joint,model=AlephAlpha_luminous-base,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=AlephAlpha_luminous-extended,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=AlephAlpha_luminous-supreme,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=ai21_j1-grande,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=ai21_j1-grande-v2-beta,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=ai21_j1-jumbo,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=ai21_j1-large,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=ai21_j2-grande,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=ai21_j2-jumbo,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=ai21_j2-large,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=anthropic_stanford-online-all-v4-s3,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=anthropic_stanford-online-all-v4-s3,groups=ablation_multiple_choicemultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=cohere_command-medium-beta,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=cohere_command-xlarge-beta,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=cohere_large-20220720,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=cohere_medium-20220720,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=cohere_medium-20221108,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=cohere_small-20220720,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=cohere_xlarge-20220609,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=cohere_xlarge-20221108,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=microsoft_TNLGv2_530B,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=microsoft_TNLGv2_7B,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=openai_ada,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=openai_babbage,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=openai_curie,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=openai_davinci,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=openai_gpt-3.5-turbo-0301,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=openai_text-ada-001,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=openai_text-babbage-001,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=openai_text-curie-001,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=openai_text-davinci-002,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=openai_text-davinci-003,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=together_bloom,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=together_bloom,groups=ablation_multiple_choicemultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=together_glm,data_augmentation=canonical,stop=hashmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=together_gpt-j-6b,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=together_gpt-j-6b,groups=ablation_multiple_choicemultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=together_gpt-neox-20b,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=together_gpt-neox-20b,groups=ablation_multiple_choicemultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=together_opt-175b,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=together_opt-175b,groups=ablation_multiple_choicemultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=together_opt-66b,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=together_opt-66b,groups=ablation_multiple_choicemultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=together_redpajama-incite-base-3b-v1,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=together_t0pp,data_augmentation=canonical,stop=hashmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=together_t5-11b,data_augmentation=canonical,stop=hashmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=together_ul2,data_augmentation=canonical,stop=hash,global_prefix=nlgmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=together_yalm,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=writer_palmyra-instruct-30,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_joint,model=writer_palmyra-x,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=computer_security,method=multiple_choice_separate_calibrated,model=anthropic_stanford-online-all-v4-s3,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
mmlu:subject=computer_security,method=multiple_choice_separate_calibrated,model=together_bloom,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
mmlu:subject=computer_security,method=multiple_choice_separate_calibrated,model=together_gpt-j-6b,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
mmlu:subject=computer_security,method=multiple_choice_separate_calibrated,model=together_gpt-neox-20b,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
mmlu:subject=computer_security,method=multiple_choice_separate_calibrated,model=together_opt-175b,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
mmlu:subject=computer_security,method=multiple_choice_separate_calibrated,model=together_opt-66b,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
mmlu:subject=computer_security,method=multiple_choice_separate_original,model=anthropic_stanford-online-all-v4-s3,groups=ablation_multiple_choicemultiple_choice_separate_original
mmlu:subject=computer_security,method=multiple_choice_separate_original,model=together_bloom,groups=ablation_multiple_choicemultiple_choice_separate_original
mmlu:subject=computer_security,method=multiple_choice_separate_original,model=together_gpt-j-6b,groups=ablation_multiple_choicemultiple_choice_separate_original
mmlu:subject=computer_security,method=multiple_choice_separate_original,model=together_gpt-neox-20b,groups=ablation_multiple_choicemultiple_choice_separate_original
mmlu:subject=computer_security,method=multiple_choice_separate_original,model=together_opt-175b,groups=ablation_multiple_choicemultiple_choice_separate_original
mmlu:subject=computer_security,method=multiple_choice_separate_original,model=together_opt-66b,groups=ablation_multiple_choicemultiple_choice_separate_original
mmlu:subject=econometrics,method=multiple_choice_joint,model=AlephAlpha_luminous-base,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=econometrics,method=multiple_choice_joint,model=AlephAlpha_luminous-extended,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=econometrics,method=multiple_choice_joint,model=AlephAlpha_luminous-supreme,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=econometrics,method=multiple_choice_joint,model=ai21_j1-grande,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=econometrics,method=multiple_choice_joint,model=ai21_j1-grande-v2-beta,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=econometrics,method=multiple_choice_joint,model=ai21_j1-jumbo,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=econometrics,method=multiple_choice_joint,model=ai21_j1-large,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=econometrics,method=multiple_choice_joint,model=ai21_j2-grande,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=econometrics,method=multiple_choice_joint,model=ai21_j2-jumbo,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=econometrics,method=multiple_choice_joint,model=ai21_j2-large,data_augmentation=canonicalmultiple_choice_joint
mmlu:subject=econometrics,method=multiple_choice_joint,model=anthropic_stanford-online-all-v4-s3,data_augmentation=canonicalmultiple_choice_joint