Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions api/common/config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# Copyright (c) MLCommons and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
Expand Down Expand Up @@ -42,4 +46,5 @@
"ssl_org_pem_file_path": os.environ["SSL_ORG_PEM_FILE"],
"trial_jwtexp": 900,
"frontend_ip": os.environ["FRONTEND_IP"],
"runpod_api_key": os.environ.get("RUNPOD_API_KEY", ""),
}
117 changes: 76 additions & 41 deletions api/controllers/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,9 +348,12 @@ def do_upload_via_train_files(credentials, tid, model_name):
current_upload = json.loads(upload.file.read().decode("utf-8"))
upload.file.seek(0)
payload = {
"id_json": current_upload,
"bucket_name": task.s3_bucket,
"key": name,
"input": {
"id_json": current_upload,
"bucket_name": task.s3_bucket,
"key": name,
"model_id": model[1], # Add model_id for backend processing
}
}
s3_client.upload_fileobj(
upload.file,
Expand All @@ -359,19 +362,24 @@ def do_upload_via_train_files(credentials, tid, model_name):
)

light_model_endpoint = task.lambda_model
r = requests.post(light_model_endpoint, json=payload)

try:
score = r.json()["score"]
except Exception as ex:
logger.exception(ex)
subject = f"Model {model_name} failed training as {r.json()['detail']}"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {config['runpod_api_key']}",
}

r = requests.post(light_model_endpoint, json=payload, headers=headers)

if r.status_code != 200:
logger.error(
f"RunPod request failed with status {r.status_code}: {r.text}"
)
Email().send(
contact=user.email,
cc_contact="dynabench-site@mlcommons.org",
template_name="model_train_failed.txt",
msg_dict={"name": model_name},
subject=subject,
msg_dict={"name": model_name, "model_id": model[1]},
subject=f"Model {model_name} submission failed",
)
for idx2, (rem_name, rem_upload) in enumerate(train_items[idx + 1 :]):
s3_client.upload_fileobj(
Expand All @@ -381,47 +389,74 @@ def do_upload_via_train_files(credentials, tid, model_name):
)
bottle.abort(400)

score = None # Will be set by the async evaluation

did = dm.getByName(name).id
r_realid = rm.getByTid(tid)[0].rid
if isinstance(task_config.get("perf_metric"), list):
metric = task_config.get("perf_metric")[0].get("type")
elif isinstance(task_config.get("perf_metric"), dict):
metric = task_config.get("perf_metric").get("type")
new_score = {
metric: score,
"perf": score,
"perf_std": 0.0,
"perf_by_tag": [
{
"tag": str(name),
"pretty_perf": f"{score} %",
"perf": score,
"perf_std": 0.0,
"perf_dict": {metric: score},
}
],
}

new_score_string = json.dumps(new_score)
if score is not None:
new_score = {
metric: score,
"perf": score,
"perf_std": 0.0,
"perf_by_tag": [
{
"tag": str(name),
"pretty_perf": f"{score} %",
"perf": score,
"perf_std": 0.0,
"perf_dict": {metric: score},
}
],
}

new_score_string = json.dumps(new_score)

sm.create(
model_id=model[1],
r_realid=r_realid,
did=did,
pretty_perf=f"{score} %",
perf=score,
metadata_json=new_score_string,
)

sm.create(
model_id=model[1],
r_realid=r_realid,
did=did,
pretty_perf=f"{score} %",
perf=score,
metadata_json=new_score_string,
if any(upload.content_type != "text/plain" for upload in train_files.values()):
Email().send(
contact=user.email,
cc_contact="dynabench-site@mlcommons.org",
template_name="model_train_successful.txt",
msg_dict={"name": model_name, "model_id": model[1]},
subject=f"Model {model_name} submitted for evaluation.",
)

Email().send(
contact=user.email,
cc_contact="dynabench-site@mlcommons.org",
template_name="model_train_successful.txt",
msg_dict={"name": model_name, "model_id": model[1]},
subject=f"Model {model_name} training succeeded.",
)
return util.json_encode(
{
"success": "ok",
"model_id": model[1],
"message": "Model submitted for evaluation. You will receive an email when evaluation is complete.",
}
)
else:
Email().send(
contact=user.email,
cc_contact="dynabench-site@mlcommons.org",
template_name="model_train_successful.txt",
msg_dict={"name": model_name, "model_id": model[1]},
subject=f"Model {model_name} evaluation completed.",
)

return util.json_encode({"success": "ok", "model_id": model[1]})
return util.json_encode(
{
"success": "ok",
"model_id": model[1],
"message": "Model evaluation completed successfully.",
}
)


@bottle.post("/models/upload_predictions/<tid:int>/<model_name>")
Expand Down
54 changes: 50 additions & 4 deletions backend/app/domain/services/base/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ def add_scores_and_update_model(
self.email_helper.send(
contact=user.email,
cc_contact=self.email_sender,
template_name="model_inference_failed.txt",
template_name="model_evaluation_failed.txt",
msg_dict={"name": model["name"], "message": message},
subject=f"Model {model['name']} evaluation failed.",
)
Expand All @@ -418,11 +418,57 @@ def add_scores_and_update_model(
round_info = self.round_repository.get_round_info_by_round_and_task(
model["tid"], round_id
)

# Get task configuration to determine score handling
task_config = self.task_repository.get_config_file_by_task_id(
model["tid"]
)[0]
task_config = yaml.safe_load(task_config)

metadata_json = dict(scores)

# Determine the main performance metric based on task configuration
perf_metric = task_config.get("perf_metric", {})
if isinstance(perf_metric, list):
main_metric = perf_metric[0].get("type", "score")
elif isinstance(perf_metric, dict):
main_metric = perf_metric.get("type", "score")
else:
main_metric = "score" # Default fallback

# Extract the score value - handle different formats
score_value = None

# First, try to extract from nested results (for RunPod format)
if "results" in metadata_json and "score" in metadata_json["results"]:
score_value = metadata_json["results"]["score"]
elif "score" in metadata_json:
score_value = metadata_json["score"]
elif main_metric in metadata_json:
score_value = metadata_json[main_metric]
elif "Standard_CER_15_WORSE" in metadata_json:
# Backward compatibility for speech tasks
score_value = metadata_json["Standard_CER_15_WORSE"]
main_metric = "Standard_CER_15_WORSE"
else:
raise ValueError(f"No score found in metadata: {metadata_json}")

# Format the score appropriately based on metric type
if main_metric == "Standard_CER_15_WORSE":
# Speech recognition - percentage format
pretty_perf = f"{100 * score_value:.2f}%"
else:
# Other tasks - decimal format
pretty_perf = f"{score_value:.4f}"

# Store the main metric type in metadata for reference
metadata_json["main_metric"] = main_metric
metadata_json["task_perf_metric"] = perf_metric

# Build the score structure with proper metric information
new_score = {
"perf": metadata_json["Standard_CER_15_WORSE"],
"pretty_perf": f"{100*metadata_json['Standard_CER_15_WORSE']:.2f}%",
"perf": score_value,
"pretty_perf": pretty_perf,
"mid": model_id,
"r_realid": round_info.id,
"did": datasets[0]["id"],
Expand All @@ -437,7 +483,7 @@ def add_scores_and_update_model(
cc_contact=self.email_sender,
template_name="model_evaluation_sucessful.txt",
msg_dict={"name": model["name"], "model_id": model["id"]},
subject=f"Model {model['name']} evaluation succeeded.",
subject=f"Model {model['name']} evaluation completed successfully.",
)
print(
f"sent email evaluation sucessful to {user.email} model {model['name']} "
Expand Down
Loading