Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
3d9c823
Add prompt variants feature to Prompt Node
ianarawjo Mar 13, 2025
d672345
fix countQueries backwards compatibility; add alertmodal for deleting…
ianarawjo Mar 14, 2025
bdf0b6a
Autoresize textarea when switching prompt variants. Ensure auto-templ…
ianarawjo Mar 14, 2025
cb9f3bf
cleanup unused imports
ianarawjo Mar 14, 2025
66d067b
Add 'human verification' icons beside eval scores in Response Inspector
ianarawjo Mar 15, 2025
7f77b73
Eval score assessments save. Boolean assessments flip when the eval f…
ianarawjo Mar 15, 2025
b357b5b
Merge branch 'main' into eval-the-evals
ianarawjo Mar 15, 2025
9350218
wip transfer
ianarawjo Mar 15, 2025
ca72eed
wip transfer
ianarawjo Mar 15, 2025
4ca8bd9
wip transfer
ianarawjo Mar 15, 2025
248cb9c
wip transfer
ianarawjo Mar 15, 2025
9044bb4
wip transfer
ianarawjo Mar 15, 2025
09123ee
wip transfer
ianarawjo Mar 15, 2025
4a5e952
Add new Stepper modal WIP
ianarawjo Mar 16, 2025
7d6d13c
wip
ianarawjo Mar 16, 2025
0724efb
wip
ianarawjo Mar 16, 2025
04fe78e
wip
ianarawjo Mar 16, 2025
411de58
wip expand EvalGen into its own folder
ianarawjo Mar 17, 2025
b6746e7
wip
ianarawjo Mar 18, 2025
10cc210
wip make executor use custom provider from global CF settings
ianarawjo Mar 19, 2025
6b2b3cf
wip getting executor to work
ianarawjo Mar 19, 2025
bc453a9
wip
ianarawjo Mar 22, 2025
2a0d6c4
Fixed bug in executor (whew)
ianarawjo Mar 26, 2025
8910d78
wip
ianarawjo Mar 28, 2025
1d207f1
Began refactoring for executor to use perCriteriaGrades. Changed 'ali…
ianarawjo Mar 30, 2025
8cbabc7
merge
ianarawjo May 12, 2025
16fbaa6
cleanup
ianarawjo May 12, 2025
ed86c99
wip
ianarawjo May 13, 2025
65c24e1
wip
ianarawjo May 13, 2025
ef3045b
Bug and typing fixing
ianarawjo May 13, 2025
79111f0
Add ability to change x-axis var in plot
ianarawjo May 14, 2025
9b6ad33
Bug fix plotting acc
ianarawjo May 14, 2025
e470a72
Bug fixes and inject feedback into eval criteria gen context
ianarawjo May 14, 2025
c81ac65
cleanup
ianarawjo May 14, 2025
140366d
Plot acc bug fix
ianarawjo May 14, 2025
8c2e808
Remove old evalgen files
ianarawjo May 14, 2025
95553f7
Update readme
ianarawjo May 14, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
435 changes: 435 additions & 0 deletions chainforge/react-server/src/EvalGen/EvalGenWizard.tsx

Large diffs are not rendered by default.

169 changes: 169 additions & 0 deletions chainforge/react-server/src/EvalGen/FeedbackStep.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
import React, { useCallback, useEffect, useMemo, useState } from "react";
import { Dict, LLMResponse, RatingDict } from "../backend/typing";
import {
Button,
Center,
Flex,
Stack,
Text,
Textarea,
Title,
Tooltip,
} from "@mantine/core";
import GradingView from "./GradingView";
import { IconThumbDown, IconThumbUp } from "@tabler/icons-react";
import { getRatingKeyForResponse } from "../ResponseRatingToolbar";
import useStore from "../store";
import { deepcopy } from "../backend/utils";
import StorageCache from "../backend/cache";

interface FeedbackStepProps {
onNext: () => void;
onPrevious: () => void;
responses: LLMResponse[];
setOnNextCallback: React.Dispatch<React.SetStateAction<() => unknown>>;
}

const FeedbackStep: React.FC<FeedbackStepProps> = ({
onNext,
onPrevious,
responses,
setOnNextCallback,
}) => {
const [shownResponse, setShownResponse] = useState<LLMResponse | undefined>(
undefined,
);
const [shownResponseIdx, setShownResponseIdx] = useState(0);

// Global state
const storeState = useStore<Dict<RatingDict>>((store) => store.state);
const setStoreState = useStore((store) => store.setState);

// The cache keys storing the ratings for this response object
const grade = useMemo(() => {
if (!shownResponse) return null;
const key = getRatingKeyForResponse(shownResponse?.uid, "grade");
const g = storeState[key];
if (g) return g[0];
else return null;
}, [shownResponse, storeState]);
const annotation = useMemo(() => {
if (!shownResponse) return "";
const key = getRatingKeyForResponse(shownResponse?.uid, "note");
const a = storeState[key];
if (a) return a[0]?.toString();
else return "";
}, [shownResponse, storeState]);

// Set the rating in the global store, which *should* update the above.
const setRating = useCallback(
(
uid: string | undefined,
label: string,
payload: boolean | string | null,
) => {
if (!uid) return;
const key = getRatingKeyForResponse(uid, label);
setStoreState(key, { 0: payload }); // TODO: This will erase any feedback given on n>1 responses in the input.
StorageCache.store(key, { 0: payload });
},
[setStoreState],
);
const setGrade = (val: boolean | null) =>
setRating(shownResponse?.uid, "grade", val);
const setAnnotation = (val: string) =>
setRating(shownResponse?.uid, "note", val);

useEffect(() => {
if (!responses || responses.length === 0) return;
setShownResponse(responses[0]); // We only show the first response if n>1 resps per prompt, for simplicity's sake
setShownResponseIdx(0);
}, [responses]);

const nextResponse = useCallback(() => {
if (responses.length === 0) return;
if (shownResponseIdx < responses.length - 1) {
setShownResponseIdx(shownResponseIdx + 1);
setShownResponse(responses[shownResponseIdx + 1]);
}
}, [shownResponseIdx, responses]);

const prevResponse = useCallback(() => {
if (shownResponseIdx > 0) {
setShownResponseIdx(shownResponseIdx - 1);
setShownResponse(responses[shownResponseIdx - 1]);
}
}, [shownResponseIdx, responses]);

return (
<Stack spacing="sm" mb={200}>
<Title order={3}>Provide Feedback on Some Model Outputs</Title>

<GradingView
shownResponse={shownResponse}
shownResponseIdx={shownResponseIdx}
// shownResponseIdx={shownResponseUniqueIdx}
responseCount={responses.length}
gotoNextResponse={nextResponse}
gotoPrevResponse={prevResponse}
/>

<Flex justify="center" gap="50px">
<Tooltip label="This response is bad!" withinPortal withArrow>
<Button
color={grade === true ? "gray" : "red"}
variant={grade !== false ? "outline" : "filled"}
onClick={() => {
setGrade(grade !== false ? false : null);
}}
>
<IconThumbDown />
&nbsp;Bad!
</Button>
</Tooltip>
<Tooltip label="This response is good!" withinPortal withArrow>
<Button
color={grade === false ? "gray" : "green"}
variant={grade !== true ? "outline" : "filled"}
onClick={() => {
setGrade(grade !== true ? true : null);
}}
>
<IconThumbUp />
&nbsp;Good!
</Button>
</Tooltip>
</Flex>
<Center mb={100}>
<Stack spacing="xs" w="80%">
<Text>What&apos;s the reason for your grade? Explain why:</Text>
<Flex align="center" justify="space-around" gap="lg">
<Textarea
value={annotation}
onChange={(e) => setAnnotation(e.currentTarget.value)}
disabled={grade === null}
autoFocus
w="100%"
onKeyDown={(e) => {
if (e.key === "Enter") {
e.preventDefault();
nextResponse();
}
}}
/>
<Button
onClick={nextResponse}
color="dark"
disabled={grade === null || (grade === false && !annotation)}
h={54}
>
Submit and Next
</Button>
</Flex>
</Stack>
</Center>
</Stack>
);
};

export default FeedbackStep;
Loading