diff --git a/.github/workflows/eval-functions.yml b/.github/workflows/eval-functions.yml index 796b66c..4fe99a2 100644 --- a/.github/workflows/eval-functions.yml +++ b/.github/workflows/eval-functions.yml @@ -104,12 +104,12 @@ jobs: mkdir -p eval-results EVAL_ERRORS=0 - echo "Running explainText evaluation..." - genkit eval:flow explainText \ - --input datasets/explain-chinese.json \ - --evaluators=custom/chineseTextPresent,custom/validPinyinFormat,custom/outputStructureValid,custom/grammarExplanationQuality \ - --batchSize 10 \ - --output eval-results/explain-chinese-results.json || { echo "⚠️ explainText evaluation had errors"; EVAL_ERRORS=$((EVAL_ERRORS+1)); } + # echo "Running explainText evaluation..." + # genkit eval:flow explainText \ + # --input datasets/explain-chinese.json \ + # --evaluators=custom/chineseTextPresent,custom/validPinyinFormat,custom/outputStructureValid,custom/grammarExplanationQuality \ + # --batchSize 10 \ + # --output eval-results/explain-chinese-results.json || { echo "⚠️ explainText evaluation had errors"; EVAL_ERRORS=$((EVAL_ERRORS+1)); } echo "Running explainEnglish evaluation..." genkit eval:flow explainEnglish \ @@ -118,26 +118,26 @@ jobs: --batchSize 10 \ --output eval-results/explain-english-results.json || { echo "⚠️ explainEnglish evaluation had errors"; EVAL_ERRORS=$((EVAL_ERRORS+1)); } - echo "Running generateChineseSentences evaluation..." - genkit eval:flow generateChineseSentences \ - --input datasets/generate-chinese-sentences.json \ - --evaluators=custom/chineseTextPresent,custom/validPinyinFormat,custom/outputStructureValid,custom/sentenceGenerationQuality \ - --batchSize 10 \ - --output eval-results/generate-sentences-results.json || { echo "⚠️ generateChineseSentences evaluation had errors"; EVAL_ERRORS=$((EVAL_ERRORS+1)); } - - echo "Running analyzeCollocation evaluation..." - genkit eval:flow analyzeCollocation \ - --input datasets/analyze-collocation.json \ - --evaluators=custom/chineseTextPresent,custom/englishTranslationPresent,custom/outputStructureValid \ - --batchSize 10 \ - --output eval-results/collocation-results.json || { echo "⚠️ analyzeCollocation evaluation had errors"; EVAL_ERRORS=$((EVAL_ERRORS+1)); } - - echo "Running explainWordInContext evaluation..." - genkit eval:flow explainWordInContext \ - --input datasets/explain-word-in-context.json \ - --evaluators=custom/chineseTextPresent,custom/englishTranslationPresent,custom/outputStructureValid \ - --batchSize 10 \ - --output eval-results/word-context-results.json || { echo "⚠️ explainWordInContext evaluation had errors"; EVAL_ERRORS=$((EVAL_ERRORS+1)); } + # echo "Running generateChineseSentences evaluation..." + # genkit eval:flow generateChineseSentences \ + # --input datasets/generate-chinese-sentences.json \ + # --evaluators=custom/chineseTextPresent,custom/validPinyinFormat,custom/outputStructureValid,custom/sentenceGenerationQuality \ + # --batchSize 10 \ + # --output eval-results/generate-sentences-results.json || { echo "⚠️ generateChineseSentences evaluation had errors"; EVAL_ERRORS=$((EVAL_ERRORS+1)); } + + # echo "Running analyzeCollocation evaluation..." + # genkit eval:flow analyzeCollocation \ + # --input datasets/analyze-collocation.json \ + # --evaluators=custom/chineseTextPresent,custom/englishTranslationPresent,custom/outputStructureValid \ + # --batchSize 10 \ + # --output eval-results/collocation-results.json || { echo "⚠️ analyzeCollocation evaluation had errors"; EVAL_ERRORS=$((EVAL_ERRORS+1)); } + + # echo "Running explainWordInContext evaluation..." + # genkit eval:flow explainWordInContext \ + # --input datasets/explain-word-in-context.json \ + # --evaluators=custom/chineseTextPresent,custom/englishTranslationPresent,custom/outputStructureValid \ + # --batchSize 10 \ + # --output eval-results/word-context-results.json || { echo "⚠️ explainWordInContext evaluation had errors"; EVAL_ERRORS=$((EVAL_ERRORS+1)); } if [ $EVAL_ERRORS -gt 0 ]; then echo "⚠️ $EVAL_ERRORS evaluation(s) had errors - check results for details" diff --git a/functions/prompts/explain-english.prompt b/functions/prompts/explain-english.prompt index f92836b..6dac2f8 100644 --- a/functions/prompts/explain-english.prompt +++ b/functions/prompts/explain-english.prompt @@ -7,7 +7,16 @@ output: schema: EnglishExplanationSchema --- {{role "system"}} -You are a helpful Chinese teacher for speakers of English who want to learn Chinese. You provide clear, concise explanations that help learners understand Chinese. +You are an expert Chinese language tutor. You have thoroughly studied the Chinese Grammar Wiki and HSK Standard Course textbooks, and you use their terminology and teaching approaches. + +Prioritize accuracy over comprehensiveness — only explain what you are confident about. +Keep your explanations focused and practical for a language learner. + +CRITICAL REQUIREMENTS: +1. Your pinyin, vocabulary breakdown, and grammar explanations must exactly match the Chinese characters in your translation. Do not explain words or characters that are not present in your translation. +2. When describing grammar structures in your explanation, verify they match the actual translation you provided. Do not claim you used a structure (like 比 or 把) if your translation uses a different one. +3. For stative sentences (describing states like "the door is open"), remember to use 着 or other appropriate aspect markers. + {{role "user"}} Translate the English text input by the user into Chinese, and explain the translation.