imdb:only_contrast=True,model=cohere_large-20220720,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=cohere_medium-20220720,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=cohere_medium-20221108,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=cohere_small-20220720,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=cohere_xlarge-20220609,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=cohere_xlarge-20221108,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=microsoft_TNLGv2_530B,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=microsoft_TNLGv2_7B,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=openai_ada,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=openai_babbage,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=openai_curie,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=openai_davinci,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=openai_gpt-3.5-turbo-0301,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=openai_text-ada-001,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=openai_text-babbage-001,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=openai_text-curie-001,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=openai_text-davinci-002,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=openai_text-davinci-003,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=together_bloom,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=together_glm,data_augmentation=contrast_sets,stop=hash,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=together_gpt-j-6b,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=together_gpt-neox-20b,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=together_opt-175b,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=together_opt-66b,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=together_redpajama-incite-base-3b-v1,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=together_t0pp,data_augmentation=contrast_sets,stop=hash,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=together_t5-11b,data_augmentation=contrast_sets,stop=hash,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=together_ul2,data_augmentation=contrast_sets,stop=hash,global_prefix=nlg,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=together_yalm,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=writer_palmyra-instruct-30,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
imdb:only_contrast=True,model=writer_palmyra-x,data_augmentation=contrast_sets,groups=robustness_contrast_sets | generation |
legal_support,method=multiple_choice_joint:model=AlephAlpha_luminous-base | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=AlephAlpha_luminous-extended | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=AlephAlpha_luminous-supreme | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=ai21_j1-grande | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=ai21_j1-grande-v2-beta | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=ai21_j1-jumbo | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=ai21_j1-large | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=ai21_j2-grande | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=ai21_j2-jumbo | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=ai21_j2-large | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=anthropic_stanford-online-all-v4-s3 | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=anthropic_stanford-online-all-v4-s3,groups=ablation_multiple_choice | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=cohere_command-medium-beta | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=cohere_command-xlarge-beta | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=cohere_large-20220720 | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=cohere_medium-20220720 | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=cohere_medium-20221108 | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=cohere_small-20220720 | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=cohere_xlarge-20220609 | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=cohere_xlarge-20221108 | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=microsoft_TNLGv2_530B | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=microsoft_TNLGv2_7B | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=openai_ada | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=openai_babbage | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=openai_code-cushman-001 | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=openai_code-davinci-002 | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=openai_curie | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=openai_davinci | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=openai_gpt-3.5-turbo-0301 | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=openai_text-ada-001 | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=openai_text-babbage-001 | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=openai_text-curie-001 | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=openai_text-davinci-002 | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=openai_text-davinci-003 | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=together_bloom | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=together_bloom,groups=ablation_multiple_choice | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=together_glm,stop=hash | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=together_gpt-j-6b | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=together_gpt-j-6b,groups=ablation_multiple_choice | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=together_gpt-neox-20b | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=together_gpt-neox-20b,groups=ablation_multiple_choice | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=together_opt-175b | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=together_opt-175b,groups=ablation_multiple_choice | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=together_opt-66b | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=together_opt-66b,groups=ablation_multiple_choice | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=together_redpajama-incite-base-3b-v1 | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=together_t0pp,stop=hash | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=together_t5-11b,stop=hash | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=together_ul2,stop=hash,global_prefix=nlg | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=together_yalm | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=writer_palmyra-instruct-30 | multiple_choice_joint |
legal_support,method=multiple_choice_joint:model=writer_palmyra-x | multiple_choice_joint |
legal_support,method=multiple_choice_separate_calibrated:model=anthropic_stanford-online-all-v4-s3,groups=ablation_multiple_choice | multiple_choice_separate_calibrated |
legal_support,method=multiple_choice_separate_calibrated:model=together_bloom,groups=ablation_multiple_choice | multiple_choice_separate_calibrated |
legal_support,method=multiple_choice_separate_calibrated:model=together_gpt-j-6b,groups=ablation_multiple_choice | multiple_choice_separate_calibrated |
legal_support,method=multiple_choice_separate_calibrated:model=together_gpt-neox-20b,groups=ablation_multiple_choice | multiple_choice_separate_calibrated |
legal_support,method=multiple_choice_separate_calibrated:model=together_opt-175b,groups=ablation_multiple_choice | multiple_choice_separate_calibrated |
legal_support,method=multiple_choice_separate_calibrated:model=together_opt-66b,groups=ablation_multiple_choice | multiple_choice_separate_calibrated |
legal_support,method=multiple_choice_separate_original:model=anthropic_stanford-online-all-v4-s3,groups=ablation_multiple_choice | multiple_choice_separate_original |
legal_support,method=multiple_choice_separate_original:model=together_bloom,groups=ablation_multiple_choice | multiple_choice_separate_original |
legal_support,method=multiple_choice_separate_original:model=together_gpt-j-6b,groups=ablation_multiple_choice | multiple_choice_separate_original |
legal_support,method=multiple_choice_separate_original:model=together_gpt-neox-20b,groups=ablation_multiple_choice | multiple_choice_separate_original |
legal_support,method=multiple_choice_separate_original:model=together_opt-175b,groups=ablation_multiple_choice | multiple_choice_separate_original |
legal_support,method=multiple_choice_separate_original:model=together_opt-66b,groups=ablation_multiple_choice | multiple_choice_separate_original |
lsat_qa:task=all,method=multiple_choice_joint,model=AlephAlpha_luminous-base | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=AlephAlpha_luminous-extended | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=AlephAlpha_luminous-supreme | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=ai21_j1-grande | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=ai21_j1-grande-v2-beta | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=ai21_j1-jumbo | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=ai21_j1-large | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=ai21_j2-grande | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=ai21_j2-jumbo | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=ai21_j2-large | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=anthropic_stanford-online-all-v4-s3 | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=anthropic_stanford-online-all-v4-s3,groups=ablation_multiple_choice | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=cohere_command-medium-beta | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=cohere_command-xlarge-beta | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=cohere_large-20220720 | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=cohere_medium-20220720 | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=cohere_medium-20221108 | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=cohere_small-20220720 | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=cohere_xlarge-20220609 | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=cohere_xlarge-20221108 | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=microsoft_TNLGv2_530B | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=microsoft_TNLGv2_7B | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=openai_ada | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=openai_babbage | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=openai_code-cushman-001 | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=openai_code-davinci-002 | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=openai_curie | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=openai_davinci | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=openai_gpt-3.5-turbo-0301 | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=openai_text-ada-001 | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=openai_text-babbage-001 | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=openai_text-curie-001 | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=openai_text-davinci-002 | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=openai_text-davinci-003 | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=together_bloom | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=together_bloom,groups=ablation_multiple_choice | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=together_glm,stop=hash | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=together_gpt-j-6b | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=together_gpt-j-6b,groups=ablation_multiple_choice | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=together_gpt-neox-20b | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=together_gpt-neox-20b,groups=ablation_multiple_choice | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=together_opt-175b | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=together_opt-175b,groups=ablation_multiple_choice | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=together_opt-66b | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=together_opt-66b,groups=ablation_multiple_choice | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=together_redpajama-incite-base-3b-v1 | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=together_t0pp,stop=hash | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=together_t5-11b,stop=hash | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=together_ul2,stop=hash,global_prefix=nlg | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=together_yalm | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=writer_palmyra-instruct-30 | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_joint,model=writer_palmyra-x | multiple_choice_joint |
lsat_qa:task=all,method=multiple_choice_separate_calibrated,model=anthropic_stanford-online-all-v4-s3,groups=ablation_multiple_choice | multiple_choice_separate_calibrated |
lsat_qa:task=all,method=multiple_choice_separate_calibrated,model=together_bloom,groups=ablation_multiple_choice | multiple_choice_separate_calibrated |
lsat_qa:task=all,method=multiple_choice_separate_calibrated,model=together_gpt-j-6b,groups=ablation_multiple_choice | multiple_choice_separate_calibrated |
lsat_qa:task=all,method=multiple_choice_separate_calibrated,model=together_gpt-neox-20b,groups=ablation_multiple_choice | multiple_choice_separate_calibrated |
lsat_qa:task=all,method=multiple_choice_separate_calibrated,model=together_opt-175b,groups=ablation_multiple_choice | multiple_choice_separate_calibrated |
lsat_qa:task=all,method=multiple_choice_separate_calibrated,model=together_opt-66b,groups=ablation_multiple_choice | multiple_choice_separate_calibrated |
lsat_qa:task=all,method=multiple_choice_separate_original,model=anthropic_stanford-online-all-v4-s3,groups=ablation_multiple_choice | multiple_choice_separate_original |
lsat_qa:task=all,method=multiple_choice_separate_original,model=together_bloom,groups=ablation_multiple_choice | multiple_choice_separate_original |
lsat_qa:task=all,method=multiple_choice_separate_original,model=together_gpt-j-6b,groups=ablation_multiple_choice | multiple_choice_separate_original |
lsat_qa:task=all,method=multiple_choice_separate_original,model=together_gpt-neox-20b,groups=ablation_multiple_choice | multiple_choice_separate_original |
lsat_qa:task=all,method=multiple_choice_separate_original,model=together_opt-175b,groups=ablation_multiple_choice | multiple_choice_separate_original |
lsat_qa:task=all,method=multiple_choice_separate_original,model=together_opt-66b,groups=ablation_multiple_choice | multiple_choice_separate_original |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=AlephAlpha_luminous-base | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=AlephAlpha_luminous-extended | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=AlephAlpha_luminous-supreme | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_j1-grande | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_j1-grande-v2-beta | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_j1-jumbo | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_j1-large | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_j2-grande | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_j2-jumbo | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=ai21_j2-large | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=anthropic_stanford-online-all-v4-s3 | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=cohere_command-medium-beta | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=cohere_command-xlarge-beta | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=cohere_large-20220720 | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=cohere_medium-20220720 | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=cohere_medium-20221108 | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=cohere_small-20220720 | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=cohere_xlarge-20220609 | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=cohere_xlarge-20221108 | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=microsoft_TNLGv2_530B | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=microsoft_TNLGv2_7B | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_ada | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_babbage | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_code-cushman-001 | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_code-davinci-002 | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_curie | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_davinci | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_gpt-3.5-turbo-0301 | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_text-ada-001 | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_text-babbage-001 | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_text-curie-001 | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_text-davinci-002 | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=openai_text-davinci-003 | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=together_bloom | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=together_glm,stop=hash | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=together_gpt-j-6b | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=together_gpt-neox-20b | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=together_opt-175b | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=together_opt-66b | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=together_redpajama-incite-base-3b-v1 | generation |
math:subject=algebra,level=1,use_official_examples=False,use_chain_of_thought=True,model=together_t0pp,stop=hash | generation |