Raw Runs

RunAdaptation method
imdb:only_contrast=True,model=cohere_large-20220720,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=cohere_medium-20220720,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=cohere_medium-20221108,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=cohere_small-20220720,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=cohere_xlarge-20220609,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=cohere_xlarge-20221108,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=microsoft_TNLGv2_530B,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=microsoft_TNLGv2_7B,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=openai_ada,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=openai_babbage,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=openai_curie,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=openai_davinci,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=openai_gpt-3.5-turbo-0301,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=openai_text-ada-001,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=openai_text-babbage-001,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=openai_text-curie-001,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=openai_text-davinci-002,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=openai_text-davinci-003,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=together_bloom,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=together_glm,data_augmentation=contrast_sets,stop=hash,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=together_gpt-j-6b,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=together_gpt-neox-20b,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=together_opt-175b,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=together_opt-66b,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=together_redpajama-incite-base-3b-v1,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=together_t0pp,data_augmentation=contrast_sets,stop=hash,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=together_t5-11b,data_augmentation=contrast_sets,stop=hash,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=together_ul2,data_augmentation=contrast_sets,stop=hash,global_prefix=nlg,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=together_yalm,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=writer_palmyra-instruct-30,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
imdb:only_contrast=True,model=writer_palmyra-x,data_augmentation=contrast_sets,groups=robustness_contrast_setsgeneration
legal_support,method=multiple_choice_joint:model=AlephAlpha_luminous-basemultiple_choice_joint
legal_support,method=multiple_choice_joint:model=AlephAlpha_luminous-extendedmultiple_choice_joint
legal_support,method=multiple_choice_joint:model=AlephAlpha_luminous-suprememultiple_choice_joint
legal_support,method=multiple_choice_joint:model=ai21_j1-grandemultiple_choice_joint
legal_support,method=multiple_choice_joint:model=ai21_j1-grande-v2-betamultiple_choice_joint
legal_support,method=multiple_choice_joint:model=ai21_j1-jumbomultiple_choice_joint
legal_support,method=multiple_choice_joint:model=ai21_j1-largemultiple_choice_joint
legal_support,method=multiple_choice_joint:model=ai21_j2-grandemultiple_choice_joint
legal_support,method=multiple_choice_joint:model=ai21_j2-jumbomultiple_choice_joint
legal_support,method=multiple_choice_joint:model=ai21_j2-largemultiple_choice_joint
legal_support,method=multiple_choice_joint:model=anthropic_stanford-online-all-v4-s3multiple_choice_joint
legal_support,method=multiple_choice_joint:model=anthropic_stanford-online-all-v4-s3,groups=ablation_multiple_choicemultiple_choice_joint
legal_support,method=multiple_choice_joint:model=cohere_command-medium-betamultiple_choice_joint
legal_support,method=multiple_choice_joint:model=cohere_command-xlarge-betamultiple_choice_joint
legal_support,method=multiple_choice_joint:model=cohere_large-20220720multiple_choice_joint
legal_support,method=multiple_choice_joint:model=cohere_medium-20220720multiple_choice_joint
legal_support,method=multiple_choice_joint:model=cohere_medium-20221108multiple_choice_joint
legal_support,method=multiple_choice_joint:model=cohere_small-20220720multiple_choice_joint
legal_support,method=multiple_choice_joint:model=cohere_xlarge-20220609multiple_choice_joint
legal_support,method=multiple_choice_joint:model=cohere_xlarge-20221108multiple_choice_joint
legal_support,method=multiple_choice_joint:model=microsoft_TNLGv2_530Bmultiple_choice_joint
legal_support,method=multiple_choice_joint:model=microsoft_TNLGv2_7Bmultiple_choice_joint
legal_support,method=multiple_choice_joint:model=openai_adamultiple_choice_joint
legal_support,method=multiple_choice_joint:model=openai_babbagemultiple_choice_joint
legal_support,method=multiple_choice_joint:model=openai_code-cushman-001multiple_choice_joint
legal_support,method=multiple_choice_joint:model=openai_code-davinci-002multiple_choice_joint
legal_support,method=multiple_choice_joint:model=openai_curiemultiple_choice_joint
legal_support,method=multiple_choice_joint:model=openai_davincimultiple_choice_joint
legal_support,method=multiple_choice_joint:model=openai_gpt-3.5-turbo-0301multiple_choice_joint
legal_support,method=multiple_choice_joint:model=openai_text-ada-001multiple_choice_joint
legal_support,method=multiple_choice_joint:model=openai_text-babbage-001multiple_choice_joint
legal_support,method=multiple_choice_joint:model=openai_text-curie-001multiple_choice_joint
legal_support,method=multiple_choice_joint:model=openai_text-davinci-002multiple_choice_joint
legal_support,method=multiple_choice_joint:model=openai_text-davinci-003multiple_choice_joint
legal_support,method=multiple_choice_joint:model=together_bloommultiple_choice_joint
legal_support,method=multiple_choice_joint:model=together_bloom,groups=ablation_multiple_choicemultiple_choice_joint
legal_support,method=multiple_choice_joint:model=together_glm,stop=hashmultiple_choice_joint
legal_support,method=multiple_choice_joint:model=together_gpt-j-6bmultiple_choice_joint
legal_support,method=multiple_choice_joint:model=together_gpt-j-6b,groups=ablation_multiple_choicemultiple_choice_joint
legal_support,method=multiple_choice_joint:model=together_gpt-neox-20bmultiple_choice_joint
legal_support,method=multiple_choice_joint:model=together_gpt-neox-20b,groups=ablation_multiple_choicemultiple_choice_joint
legal_support,method=multiple_choice_joint:model=together_opt-175bmultiple_choice_joint
legal_support,method=multiple_choice_joint:model=together_opt-175b,groups=ablation_multiple_choicemultiple_choice_joint
legal_support,method=multiple_choice_joint:model=together_opt-66bmultiple_choice_joint
legal_support,method=multiple_choice_joint:model=together_opt-66b,groups=ablation_multiple_choicemultiple_choice_joint
legal_support,method=multiple_choice_joint:model=together_redpajama-incite-base-3b-v1multiple_choice_joint
legal_support,method=multiple_choice_joint:model=together_t0pp,stop=hashmultiple_choice_joint
legal_support,method=multiple_choice_joint:model=together_t5-11b,stop=hashmultiple_choice_joint
legal_support,method=multiple_choice_joint:model=together_ul2,stop=hash,global_prefix=nlgmultiple_choice_joint
legal_support,method=multiple_choice_joint:model=together_yalmmultiple_choice_joint
legal_support,method=multiple_choice_joint:model=writer_palmyra-instruct-30multiple_choice_joint
legal_support,method=multiple_choice_joint:model=writer_palmyra-xmultiple_choice_joint
legal_support,method=multiple_choice_separate_calibrated:model=anthropic_stanford-online-all-v4-s3,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
legal_support,method=multiple_choice_separate_calibrated:model=together_bloom,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
legal_support,method=multiple_choice_separate_calibrated:model=together_gpt-j-6b,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
legal_support,method=multiple_choice_separate_calibrated:model=together_gpt-neox-20b,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
legal_support,method=multiple_choice_separate_calibrated:model=together_opt-175b,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
legal_support,method=multiple_choice_separate_calibrated:model=together_opt-66b,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
legal_support,method=multiple_choice_separate_original:model=anthropic_stanford-online-all-v4-s3,groups=ablation_multiple_choicemultiple_choice_separate_original
legal_support,method=multiple_choice_separate_original:model=together_bloom,groups=ablation_multiple_choicemultiple_choice_separate_original
legal_support,method=multiple_choice_separate_original:model=together_gpt-j-6b,groups=ablation_multiple_choicemultiple_choice_separate_original
legal_support,method=multiple_choice_separate_original:model=together_gpt-neox-20b,groups=ablation_multiple_choicemultiple_choice_separate_original
legal_support,method=multiple_choice_separate_original:model=together_opt-175b,groups=ablation_multiple_choicemultiple_choice_separate_original
legal_support,method=multiple_choice_separate_original:model=together_opt-66b,groups=ablation_multiple_choicemultiple_choice_separate_original
lsat_qa:task=all,method=multiple_choice_joint,model=AlephAlpha_luminous-basemultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=AlephAlpha_luminous-extendedmultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=AlephAlpha_luminous-suprememultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=ai21_j1-grandemultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=ai21_j1-grande-v2-betamultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=ai21_j1-jumbomultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=ai21_j1-largemultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=ai21_j2-grandemultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=ai21_j2-jumbomultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=ai21_j2-largemultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=anthropic_stanford-online-all-v4-s3multiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=anthropic_stanford-online-all-v4-s3,groups=ablation_multiple_choicemultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=cohere_command-medium-betamultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=cohere_command-xlarge-betamultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=cohere_large-20220720multiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=cohere_medium-20220720multiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=cohere_medium-20221108multiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=cohere_small-20220720multiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=cohere_xlarge-20220609multiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=cohere_xlarge-20221108multiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=microsoft_TNLGv2_530Bmultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=microsoft_TNLGv2_7Bmultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=openai_adamultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=openai_babbagemultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=openai_code-cushman-001multiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=openai_code-davinci-002multiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=openai_curiemultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=openai_davincimultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=openai_gpt-3.5-turbo-0301multiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=openai_text-ada-001multiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=openai_text-babbage-001multiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=openai_text-curie-001multiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=openai_text-davinci-002multiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=openai_text-davinci-003multiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=together_bloommultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=together_bloom,groups=ablation_multiple_choicemultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=together_glm,stop=hashmultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=together_gpt-j-6bmultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=together_gpt-j-6b,groups=ablation_multiple_choicemultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=together_gpt-neox-20bmultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=together_gpt-neox-20b,groups=ablation_multiple_choicemultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=together_opt-175bmultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=together_opt-175b,groups=ablation_multiple_choicemultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=together_opt-66bmultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=together_opt-66b,groups=ablation_multiple_choicemultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=together_redpajama-incite-base-3b-v1multiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=together_t0pp,stop=hashmultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=together_t5-11b,stop=hashmultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=together_ul2,stop=hash,global_prefix=nlgmultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=together_yalmmultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=writer_palmyra-instruct-30multiple_choice_joint
lsat_qa:task=all,method=multiple_choice_joint,model=writer_palmyra-xmultiple_choice_joint
lsat_qa:task=all,method=multiple_choice_separate_calibrated,model=anthropic_stanford-online-all-v4-s3,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
lsat_qa:task=all,method=multiple_choice_separate_calibrated,model=together_bloom,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
lsat_qa:task=all,method=multiple_choice_separate_calibrated,model=together_gpt-j-6b,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
lsat_qa:task=all,method=multiple_choice_separate_calibrated,model=together_gpt-neox-20b,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
lsat_qa:task=all,method=multiple_choice_separate_calibrated,model=together_opt-175b,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
lsat_qa:task=all,method=multiple_choice_separate_calibrated,model=together_opt-66b,groups=ablation_multiple_choicemultiple_choice_separate_calibrated
lsat_qa:task=all,method=multiple_choice_separate_original,model=anthropic_stanford-online-all-v4-s3,groups=ablation_multiple_choicemultiple_choice_separate_original
lsat_qa:task=all,method=multiple_choice_separate_original,model=together_bloom,groups=ablation_multiple_choicemultiple_choice_separate_original
lsat_qa:task=all,method=multiple_choice_separate_original,model=together_gpt-j-6b,groups=ablation_multiple_choicemultiple_choice_separate_original
lsat_qa:task=all,method=multiple_choice_separate_original,model=together_gpt-neox-20b,groups=ablation_multiple_choicemultiple_choice_separate_original
lsat_qa:task=all,method=multiple_choice_separate_original,model=together_opt-175b,groups=ablation_multiple_choicemultiple_choice_separate_original
lsat_qa:task=all,method=multiple_choice_separate_original,model=together_opt-66b,groups=ablation_multiple_choicemultiple_choice_separate_original
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=AlephAlpha_luminous-basegeneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=AlephAlpha_luminous-extendedgeneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=AlephAlpha_luminous-supremegeneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_j1-grandegeneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_j1-grande-v2-betageneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_j1-jumbogeneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_j1-largegeneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_j2-grandegeneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_j2-jumbogeneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_j2-largegeneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=anthropic_stanford-online-all-v4-s3generation
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=cohere_command-medium-betageneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=cohere_command-xlarge-betageneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=cohere_large-20220720generation
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=cohere_medium-20220720generation
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=cohere_medium-20221108generation
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=cohere_small-20220720generation
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=cohere_xlarge-20220609generation
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=cohere_xlarge-20221108generation
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=microsoft_TNLGv2_530Bgeneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=microsoft_TNLGv2_7Bgeneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_adageneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_babbagegeneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_code-cushman-001generation
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_code-davinci-002generation
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_curiegeneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_davincigeneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_gpt-3.5-turbo-0301generation
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_text-ada-001generation
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_text-babbage-001generation
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_text-curie-001generation
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_text-davinci-002generation
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_text-davinci-003generation
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=together_bloomgeneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=together_glm,stop=hashgeneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=together_gpt-j-6bgeneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=together_gpt-neox-20bgeneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=together_opt-175bgeneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=together_opt-66bgeneration
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=together_redpajama-incite-base-3b-v1generation
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=together_t0pp,stop=hashgeneration