blimp:phenomenon=quantifiers,method=multiple_choice_joint,model=together_bloom,groups=ablation_multiple_choice | multiple_choice_joint |
blimp:phenomenon=quantifiers,method=multiple_choice_joint,model=together_gpt-j-6b,groups=ablation_multiple_choice | multiple_choice_joint |
blimp:phenomenon=quantifiers,method=multiple_choice_joint,model=together_gpt-neox-20b,groups=ablation_multiple_choice | multiple_choice_joint |
blimp:phenomenon=quantifiers,method=multiple_choice_joint,model=together_opt-175b,groups=ablation_multiple_choice | multiple_choice_joint |
blimp:phenomenon=quantifiers,method=multiple_choice_joint,model=together_opt-66b,groups=ablation_multiple_choice | multiple_choice_joint |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_calibrated,model=anthropic_stanford-online-all-v4-s3,groups=ablation_multiple_choice | multiple_choice_separate_calibrated |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_calibrated,model=together_bloom,groups=ablation_multiple_choice | multiple_choice_separate_calibrated |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_calibrated,model=together_gpt-j-6b,groups=ablation_multiple_choice | multiple_choice_separate_calibrated |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_calibrated,model=together_gpt-neox-20b,groups=ablation_multiple_choice | multiple_choice_separate_calibrated |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_calibrated,model=together_opt-175b,groups=ablation_multiple_choice | multiple_choice_separate_calibrated |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_calibrated,model=together_opt-66b,groups=ablation_multiple_choice | multiple_choice_separate_calibrated |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=ai21_j1-grande | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=ai21_j1-grande-v2-beta | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=ai21_j1-jumbo | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=ai21_j1-large | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=ai21_j2-grande | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=ai21_j2-jumbo | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=ai21_j2-large | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=anthropic_stanford-online-all-v4-s3 | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=anthropic_stanford-online-all-v4-s3,groups=ablation_multiple_choice | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=cohere_command-medium-beta | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=cohere_command-xlarge-beta | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=cohere_large-20220720 | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=cohere_medium-20220720 | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=cohere_medium-20221108 | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=cohere_small-20220720 | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=cohere_xlarge-20220609 | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=cohere_xlarge-20221108 | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=microsoft_TNLGv2_530B | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=microsoft_TNLGv2_7B | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=openai_ada | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=openai_babbage | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=openai_curie | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=openai_davinci | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=openai_text-ada-001 | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=openai_text-babbage-001 | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=openai_text-curie-001 | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=openai_text-davinci-002 | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=openai_text-davinci-003 | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=together_bloom | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=together_bloom,groups=ablation_multiple_choice | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=together_gpt-j-6b | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=together_gpt-j-6b,groups=ablation_multiple_choice | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=together_gpt-neox-20b | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=together_gpt-neox-20b,groups=ablation_multiple_choice | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=together_opt-175b | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=together_opt-175b,groups=ablation_multiple_choice | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=together_opt-66b | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=together_opt-66b,groups=ablation_multiple_choice | multiple_choice_separate_original |
blimp:phenomenon=quantifiers,method=multiple_choice_separate_original,model=together_redpajama-incite-base-3b-v1 | multiple_choice_separate_original |
bold:subject=all,model=AlephAlpha_luminous-base | generation |
bold:subject=all,model=AlephAlpha_luminous-extended | generation |
bold:subject=all,model=AlephAlpha_luminous-supreme | generation |
bold:subject=all,model=ai21_j1-grande | generation |
bold:subject=all,model=ai21_j1-grande-v2-beta | generation |
bold:subject=all,model=ai21_j1-jumbo | generation |
bold:subject=all,model=ai21_j1-large | generation |
bold:subject=all,model=ai21_j2-grande | generation |
bold:subject=all,model=ai21_j2-jumbo | generation |
bold:subject=all,model=ai21_j2-large | generation |
bold:subject=all,model=anthropic_stanford-online-all-v4-s3 | generation |
bold:subject=all,model=cohere_command-medium-beta | generation |
bold:subject=all,model=cohere_command-xlarge-beta | generation |
bold:subject=all,model=cohere_large-20220720 | generation |
bold:subject=all,model=cohere_medium-20220720 | generation |
bold:subject=all,model=cohere_medium-20221108 | generation |
bold:subject=all,model=cohere_small-20220720 | generation |
bold:subject=all,model=cohere_xlarge-20220609 | generation |
bold:subject=all,model=cohere_xlarge-20221108 | generation |
bold:subject=all,model=microsoft_TNLGv2_530B | generation |
bold:subject=all,model=microsoft_TNLGv2_7B | generation |
bold:subject=all,model=openai_ada | generation |
bold:subject=all,model=openai_babbage | generation |
bold:subject=all,model=openai_curie | generation |
bold:subject=all,model=openai_davinci | generation |
bold:subject=all,model=openai_gpt-3.5-turbo-0301 | generation |
bold:subject=all,model=openai_text-ada-001 | generation |
bold:subject=all,model=openai_text-babbage-001 | generation |
bold:subject=all,model=openai_text-curie-001 | generation |
bold:subject=all,model=openai_text-davinci-002 | generation |
bold:subject=all,model=openai_text-davinci-003 | generation |
bold:subject=all,model=together_bloom | generation |
bold:subject=all,model=together_glm,stop=hash | generation |
bold:subject=all,model=together_gpt-j-6b | generation |
bold:subject=all,model=together_gpt-neox-20b | generation |
bold:subject=all,model=together_opt-175b | generation |
bold:subject=all,model=together_opt-66b | generation |
bold:subject=all,model=together_redpajama-incite-base-3b-v1 | generation |
bold:subject=all,model=together_t0pp,stop=hash | generation |
bold:subject=all,model=together_t5-11b,stop=hash | generation |
bold:subject=all,model=together_ul2,stop=hash,global_prefix=nlg | generation |
bold:subject=all,model=together_yalm | generation |
bold:subject=all,model=writer_palmyra-instruct-30 | generation |
bold:subject=all,model=writer_palmyra-x | generation |
boolq:model=AlephAlpha_luminous-base,data_augmentation=canonical | generation |
boolq:model=AlephAlpha_luminous-extended,data_augmentation=canonical | generation |
boolq:model=AlephAlpha_luminous-supreme,data_augmentation=canonical | generation |
boolq:model=ai21_j1-grande,data_augmentation=canonical | generation |
boolq:model=ai21_j1-grande-v2-beta,data_augmentation=canonical | generation |
boolq:model=ai21_j1-jumbo,data_augmentation=canonical | generation |
boolq:model=ai21_j1-large,data_augmentation=canonical | generation |
boolq:model=ai21_j2-grande,data_augmentation=canonical | generation |
boolq:model=ai21_j2-jumbo,data_augmentation=canonical | generation |
boolq:model=ai21_j2-large,data_augmentation=canonical | generation |
boolq:model=anthropic_stanford-online-all-v4-s3,data_augmentation=canonical | generation |
boolq:model=cohere_command-medium-beta,data_augmentation=canonical | generation |
boolq:model=cohere_command-xlarge-beta,data_augmentation=canonical | generation |
boolq:model=cohere_large-20220720,data_augmentation=canonical | generation |
boolq:model=cohere_medium-20220720,data_augmentation=canonical | generation |
boolq:model=cohere_medium-20221108,data_augmentation=canonical | generation |
boolq:model=cohere_small-20220720,data_augmentation=canonical | generation |
boolq:model=cohere_xlarge-20220609,data_augmentation=canonical | generation |
boolq:model=cohere_xlarge-20221108,data_augmentation=canonical | generation |
boolq:model=microsoft_TNLGv2_530B,data_augmentation=canonical | generation |
boolq:model=microsoft_TNLGv2_7B,data_augmentation=canonical | generation |
boolq:model=openai_ada,data_augmentation=canonical | generation |
boolq:model=openai_babbage,data_augmentation=canonical | generation |
boolq:model=openai_curie,data_augmentation=canonical | generation |
boolq:model=openai_davinci,data_augmentation=canonical | generation |
boolq:model=openai_gpt-3.5-turbo-0301,data_augmentation=canonical | generation |
boolq:model=openai_text-ada-001,data_augmentation=canonical | generation |
boolq:model=openai_text-babbage-001,data_augmentation=canonical | generation |
boolq:model=openai_text-curie-001,data_augmentation=canonical | generation |
boolq:model=openai_text-davinci-002,data_augmentation=canonical | generation |
boolq:model=openai_text-davinci-003,data_augmentation=canonical | generation |
boolq:model=together_bloom,data_augmentation=canonical | generation |
boolq:model=together_glm,data_augmentation=canonical,stop=hash | generation |
boolq:model=together_gpt-j-6b,data_augmentation=canonical | generation |
boolq:model=together_gpt-neox-20b,data_augmentation=canonical | generation |
boolq:model=together_opt-175b,data_augmentation=canonical | generation |
boolq:model=together_opt-66b,data_augmentation=canonical | generation |
boolq:model=together_redpajama-incite-base-3b-v1,data_augmentation=canonical | generation |
boolq:model=together_t0pp,data_augmentation=canonical,stop=hash | generation |
boolq:model=together_t5-11b,data_augmentation=canonical,stop=hash | generation |
boolq:model=together_ul2,data_augmentation=canonical,stop=hash,global_prefix=nlg | generation |
boolq:model=together_yalm,data_augmentation=canonical | generation |
boolq:model=writer_palmyra-instruct-30,data_augmentation=canonical | generation |
boolq:model=writer_palmyra-x,data_augmentation=canonical | generation |
boolq:only_contrast=True,model=AlephAlpha_luminous-base,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=AlephAlpha_luminous-extended,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=AlephAlpha_luminous-supreme,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=ai21_j1-grande,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=ai21_j1-grande-v2-beta,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=ai21_j1-jumbo,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=ai21_j1-large,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=ai21_j2-grande,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=ai21_j2-jumbo,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=ai21_j2-large,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=anthropic_stanford-online-all-v4-s3,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=cohere_command-medium-beta,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=cohere_command-xlarge-beta,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=cohere_large-20220720,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=cohere_medium-20220720,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=cohere_medium-20221108,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=cohere_small-20220720,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=cohere_xlarge-20220609,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=cohere_xlarge-20221108,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=microsoft_TNLGv2_530B,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=microsoft_TNLGv2_7B,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=openai_ada,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=openai_babbage,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=openai_curie,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=openai_davinci,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=openai_gpt-3.5-turbo-0301,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=openai_text-ada-001,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=openai_text-babbage-001,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=openai_text-curie-001,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=openai_text-davinci-002,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=openai_text-davinci-003,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=together_bloom,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=together_glm,data_augmentation=contrast_sets,stop=hash,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=together_gpt-j-6b,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=together_gpt-neox-20b,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=together_opt-175b,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=together_opt-66b,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=together_redpajama-incite-base-3b-v1,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=together_t0pp,data_augmentation=contrast_sets,stop=hash,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=together_t5-11b,data_augmentation=contrast_sets,stop=hash,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=together_ul2,data_augmentation=contrast_sets,stop=hash,global_prefix=nlg,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=together_yalm,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=writer_palmyra-instruct-30,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
boolq:only_contrast=True,model=writer_palmyra-x,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
civil_comments:demographic=LGBTQ,model=AlephAlpha_luminous-base,data_augmentation=canonical | generation |
civil_comments:demographic=LGBTQ,model=AlephAlpha_luminous-extended,data_augmentation=canonical | generation |
civil_comments:demographic=LGBTQ,model=AlephAlpha_luminous-supreme,data_augmentation=canonical | generation |
civil_comments:demographic=LGBTQ,model=ai21_j1-grande,data_augmentation=canonical | generation |
civil_comments:demographic=LGBTQ,model=ai21_j1-grande-v2-beta,data_augmentation=canonical | generation |
civil_comments:demographic=LGBTQ,model=ai21_j1-jumbo,data_augmentation=canonical | generation |
civil_comments:demographic=LGBTQ,model=ai21_j1-large,data_augmentation=canonical | generation |
civil_comments:demographic=LGBTQ,model=ai21_j2-grande,data_augmentation=canonical | generation |
civil_comments:demographic=LGBTQ,model=ai21_j2-jumbo,data_augmentation=canonical | generation |
civil_comments:demographic=LGBTQ,model=ai21_j2-large,data_augmentation=canonical | generation |
civil_comments:demographic=LGBTQ,model=anthropic_stanford-online-all-v4-s3,data_augmentation=canonical | generation |
civil_comments:demographic=LGBTQ,model=cohere_command-medium-beta,data_augmentation=canonical | generation |
civil_comments:demographic=LGBTQ,model=cohere_command-xlarge-beta,data_augmentation=canonical | generation |
civil_comments:demographic=LGBTQ,model=cohere_large-20220720,data_augmentation=canonical | generation |
civil_comments:demographic=LGBTQ,model=cohere_medium-20220720,data_augmentation=canonical | generation |
civil_comments:demographic=LGBTQ,model=cohere_medium-20221108,data_augmentation=canonical | generation |
civil_comments:demographic=LGBTQ,model=cohere_small-20220720,data_augmentation=canonical | generation |
civil_comments:demographic=LGBTQ,model=cohere_xlarge-20220609,data_augmentation=canonical | generation |