diff --git a/Dockerfile b/Dockerfile index e3e66ed..ed40aee 100644 --- a/Dockerfile +++ b/Dockerfile @@ -132,7 +132,6 @@ COPY --from=builder /usr/src /antlr_build/ COPY qcon qcon/ COPY api api/ COPY pandoc pandoc/ -COPY restapi restapi/ ENTRYPOINT ["docker-entrypoint.sh"] diff --git a/api/apps.py b/api/apps.py index 2a93f58..48d213e 100644 --- a/api/apps.py +++ b/api/apps.py @@ -1,5 +1,6 @@ from django.apps import AppConfig from django.conf import settings +from django.db import connection import sys import logging logger = logging.getLogger(__name__) @@ -18,6 +19,18 @@ def ready(self): else: logger.info("qconapi has started") + # Ensure database connection is ready before accessing the database + # This prevents the RuntimeWarning about accessing database during app initialization + try: + connection.ensure_connection() + except Exception: + # Database not ready yet, skip initialization + return + + # Skip database operations during migrations + if 'migrate' in sys.argv or 'makemigrations' in sys.argv: + return + from django.contrib.auth.models import User if not User.objects.filter(username=settings.ADMIN_USERNAME).exists(): User.objects.create_superuser( diff --git a/api/consumers.py b/api/consumers.py index 5b3e9dc..3dae884 100644 --- a/api/consumers.py +++ b/api/consumers.py @@ -3,27 +3,21 @@ from django.core.files.base import ContentFile import base64 from os.path import normpath -from .models import Question, Section, QuestionLibrary, \ - Image, MultipleChoice, MultipleChoiceAnswer, TrueFalse, Fib, MultipleSelect, MultipleSelectAnswer, \ - Matching, MatchingAnswer, MatchingChoice, Ordering, WrittenResponse -import re +from .models import QuestionLibrary import logging newlogger = logging.getLogger(__name__) from .logging.logging_adapter import FilenameLoggingAdapter # from .logging.contextfilter import QuestionlibraryFilenameFilter # logger.addFilter(QuestionlibraryFilenameFilter()) -from .logging.ErrorTypes import EMFImageError -from .process.process_helper import add_error_message, html_to_plain, trim_text -from .serializers import JsonResponseSerializer -from .process.process import Process +from .pipelines.response_payload import build_response_payload, build_status_payload +from .pipelines.ws_pipeline import Process -from .process.extract_images import ImageExtractError -from .process.formatter import FormatterError -from .process.sectioner import SectionerError -from .process.splitter import SplitterError -from .process.endanswers import EndAnswerError -from .process.parser import ParserError -from .tasks import MarkDownConversionError +from .formats.docx.extract_images import ImageExtractError +from .formats.docx.formatter import FormatterError +from .formats.docx.sectioner import SectionerError +from .formats.docx.splitter import SplitterError +from .formats.docx.endanswers import EndAnswerError +from .formats.docx.parser import ParserError # class FilenameLoggingAdapter(logging.LoggerAdapter): @@ -49,47 +43,6 @@ def disconnect(self, close_code): newlogger.info("Closing Connection") # self.channel_layer.group_discard(self.sessionid, self.channel_name) - # Replace image marker with actual img element and return a boolean - def replace_image(self, obj, key, process, logger): - regex = r"(?<=<<<<)\d+(?=>>>>)" - obj_text = getattr(obj, key) - is_image = None - if obj_text: - is_image = re.search(regex, obj_text) - - if is_image != None: - obj_name = obj._meta.model.__name__ - if obj_name == "Question": - logger.debug(f'Adding Image(s) to Question #{obj.number_provided}') - elif obj_name == "Section": - logger.debug(f'Adding Image(s) to Section "{obj.title}"') - else: - logger.debug(f'Adding Image(s) to a {obj_name}') - - image_ids = list(set(re.findall(regex, obj_text))) - for image_id in image_ids: - image = process.questionlibrary.get_image(int(image_id)) - img_src = image.image - placeholder = "<<<<" + image_id + ">>>>" - - if re.match(r"\' - add_error_message(obj, error_message) - raise EMFImageError(obj.error) - except Exception as e: - logger.error(e) - - obj_text = re.sub(placeholder, lambda x: image.image, obj_text) - - setattr(obj, key, obj_text) - obj.save() - return True - return False - - - def receive_json(self, content, **kwargs): ########################################### @@ -125,9 +78,17 @@ def receive_json(self, content, **kwargs): logger.info("File Saved") except Exception as e: logger.error("Not a valid .docx File: {e}") - self.send(text_data=json.dumps(process.sendformat("Error", "Not a valid .docx File", ""))) + error_payload = build_status_payload( + "Error", + "Not a valid .docx File", + "", + process=None, + questionlibrary=None, + ) + self.send(text_data=json.dumps(error_payload)) # close connection - self.send(text_data=json.dumps(process.sendformat("Close", "", ""))) + close_payload = build_status_payload("Close", "", "", process=None, questionlibrary=None) + self.send(text_data=json.dumps(close_payload)) return ########################################### @@ -140,10 +101,23 @@ def receive_json(self, content, **kwargs): logger.info("Pandoc DONE") except Exception as e: logger.error(str(e)) - self.send( - text_data=json.dumps(process.sendformat("Error", "File unreadable", ""))) + error_payload = build_status_payload( + "Error", + "File unreadable", + "", + process=process, + questionlibrary=process.questionlibrary, + ) + self.send(text_data=json.dumps(error_payload)) # close connection - self.send(text_data=json.dumps(process.sendformat("Close", "", ""))) + close_payload = build_status_payload( + "Close", + "", + "", + process=process, + questionlibrary=process.questionlibrary, + ) + self.send(text_data=json.dumps(close_payload)) # return # except Exception as e: # self.send(text_data=json.dumps(process.sendformat("Close", "", ""))) @@ -199,9 +173,23 @@ def receive_json(self, content, **kwargs): logger.info("Formatter DONE") except FormatterError as e: logger.error("FormatterError: " + str(e)) - self.send(text_data=json.dumps(process.sendformat("Error", "No contents found in the body of the file", ""))) + error_payload = build_status_payload( + "Error", + "No contents found in the body of the file", + "", + process=process, + questionlibrary=process.questionlibrary, + ) + self.send(text_data=json.dumps(error_payload)) # close connection - self.send(text_data=json.dumps(process.sendformat("Close", "", ""))) + close_payload = build_status_payload( + "Close", + "", + "", + process=process, + questionlibrary=process.questionlibrary, + ) + self.send(text_data=json.dumps(close_payload)) return else: self.send(text_data=json.dumps(process.sendformat("Busy", "Content Body detected", ""))) @@ -215,9 +203,23 @@ def receive_json(self, content, **kwargs): logger.info("Sectioner DONE") except SectionerError as e: logger.error("SectionerError: " + str(e)) - self.send(text_data=json.dumps(process.sendformat("Error", "Sections can not be identified", ""))) + error_payload = build_status_payload( + "Error", + "Sections can not be identified", + "", + process=process, + questionlibrary=process.questionlibrary, + ) + self.send(text_data=json.dumps(error_payload)) # close connection - self.send(text_data=json.dumps(process.sendformat("Close", "", ""))) + close_payload = build_status_payload( + "Close", + "", + "", + process=process, + questionlibrary=process.questionlibrary, + ) + self.send(text_data=json.dumps(close_payload)) return else: self.send(text_data=json.dumps(process.sendformat("Busy", "Section found: " + str(process.subsection_count), ""))) @@ -231,9 +233,23 @@ def receive_json(self, content, **kwargs): logger.info("Splitter DONE") except Exception as e: logger.error("SplitterError: " + str(e)) - self.send(text_data=json.dumps(process.sendformat("Error", "Splitter failed", ""))) + error_payload = build_status_payload( + "Error", + "Splitter failed", + "", + process=process, + questionlibrary=process.questionlibrary, + ) + self.send(text_data=json.dumps(error_payload)) # close connection - self.send(text_data=json.dumps(process.sendformat("Close", "", ""))) + close_payload = build_status_payload( + "Close", + "", + "", + process=process, + questionlibrary=process.questionlibrary, + ) + self.send(text_data=json.dumps(close_payload)) return else: self.send(text_data=json.dumps(process.sendformat("Busy", "Question found: " + str(process.questions_expected), ""))) @@ -260,171 +276,39 @@ def receive_json(self, content, **kwargs): logger.info("Parser DONE") except Exception as e: logger.error("ParserError: " + str(e)) - self.send(text_data=json.dumps(process.sendformat("Error", "Parser failed", ""))) + error_payload = build_status_payload( + "Error", + "Parser failed", + "", + process=process, + questionlibrary=process.questionlibrary, + ) + self.send(text_data=json.dumps(error_payload)) # close connection - self.send(text_data=json.dumps(process.sendformat("Close", "", ""))) + close_payload = build_status_payload( + "Close", + "", + "", + process=process, + questionlibrary=process.questionlibrary, + ) + self.send(text_data=json.dumps(close_payload)) else: self.send(text_data=json.dumps(process.sendformat("Busy", "Parser complete", ""))) -########################################### - # Loop All Sections and Questions to count error, add/replace images, and add question.title -########################################### - logger.debug("Start Adding Images back ...") - try: - # select all sections for this QL - sections = process.questionlibrary.get_sections() - for section in sections: - - # DO NOT DELETE: replace images in section.text - section_replace_image = self.replace_image(section, "text", process, logger) - - # select all questions for this QL - questions = Question.objects.filter(section=section) - - for question in questions: - is_table = False - img_replaced = False - -########################################### - # count all question level errors -########################################### - # logger.debug("count all question level errors ...") - if question.info is not None: - process.question_info_count += 1 - - if question.warning is not None: - process.question_warning_count += 1 - - if question.error is not None: - process.question_error_count += 1 - - - is_table = re.search(r"", question.text) or is_table - -########################################### - # replace Image placeholder for questions -########################################### - - # replace image in question.text if exist - img_replaced = self.replace_image(question, 'text', process, logger) or img_replaced - - match(question.questiontype): - case 'MC': - #Check MC - MC_answer_objects = MultipleChoiceAnswer.objects.filter(multiple_choice__question=question) - for answer in MC_answer_objects: - img_replaced = self.replace_image(answer, 'answer', process, logger) or img_replaced - is_table = re.search(r"", answer.answer) or is_table - if answer.answer_feedback is not None: - img_replaced = self.replace_image(answer, 'answer_feedback', process, logger) or img_replaced - is_table = re.search(r"", answer.answer_feedback) or is_table - case 'TF': - #Check TF - TF_object = TrueFalse.objects.filter(question=question) - for tf in TF_object: - if tf.true_feedback is not None: - img_replaced = self.replace_image(tf, 'true_feedback', process, logger) or img_replaced - is_table = re.search(r"", tf.true_feedback) or is_table - if tf.false_feedback is not None: - img_replaced = self.replace_image(tf, 'false_feedback', process, logger) or img_replaced - is_table = re.search(r"", tf.false_feedback) or is_table - case 'FIB' | 'FMB': - #Check FIB - FIB_object = Fib.objects.filter(question=question) - for fib_question in FIB_object: - img_replaced = self.replace_image(fib_question, 'text', process, logger) or img_replaced - is_table = re.search(r"", fib_question.text) or is_table - case 'MS' | 'MR': - #Check MS - MS_answer_objects = MultipleSelectAnswer.objects.filter(multiple_select__question=question) - for answer in MS_answer_objects: - img_replaced = self.replace_image(answer, 'answer', process, logger) or img_replaced - is_table = re.search(r"", answer.answer) or is_table - if answer.answer_feedback is not None: - img_replaced = self.replace_image(answer, 'answer_feedback', process, logger) or img_replaced - is_table = re.search(r"", answer.answer_feedback) or is_table - case 'ORD': - #Check ORD - ORD_objects = Ordering.objects.filter(question=question) - for ordering in ORD_objects: - if ordering.text is not None: - img_replaced = self.replace_image(ordering, 'text', process, logger) or img_replaced - is_table = re.search(r"", ordering.text) or is_table - if ordering.ord_feedback is not None: - img_replaced = self.replace_image(ordering, 'ord_feedback', process, logger) or img_replaced - is_table = re.search(r"", ordering.ord_feedback) or is_table - case 'MAT' | 'MT': - #Check MAT answer - MAT_answer_objects = MatchingAnswer.objects.filter(matching_choice__matching__question=question) - for mat_answer in MAT_answer_objects: - if mat_answer.answer_text is not None: - img_replaced = self.replace_image(mat_answer, 'answer_text', process, logger) or img_replaced - is_table = re.search(r"", mat_answer.answer_text) or is_table - #Check MAT choice - MAT_choice_objects = MatchingChoice.objects.filter(matching__question=question) - for mat_choice in MAT_choice_objects: - if mat_choice.choice_text is not None: - img_replaced = self.replace_image(mat_choice, 'choice_text', process, logger) or img_replaced - is_table = re.search(r"", mat_choice.choice_text) or is_table - case 'WR' | 'E': - #Check WR - WR_objects = WrittenResponse.objects.filter(question=question) - for wr in WR_objects: - if wr.initial_text is not None: - img_replaced = self.replace_image(wr, 'initial_text', process, logger) or img_replaced - is_table = re.search(r"", wr.initial_text) or is_table - if wr.answer_key is not None: - img_replaced = self.replace_image(wr, 'answer_key', process, logger) or img_replaced - is_table = re.search(r"", wr.answer_key) or is_table - - -########################################### - # Add question.title -########################################### - prefix = '' - - if is_table: - prefix = '[TABLE]' + prefix - if img_replaced: - prefix = '[IMG]' + prefix - - # Save question.title - if question.title is None: - title_text = question.text - title_text = title_text.replace('\n', ' ') - title_text = re.sub(r"", "[IMG]", title_text) - title_text = re.sub(r"", "[TABLE]", title_text) - title_text = re.sub(r"<<<<\d+>>>>", "[IMG]", title_text) - - if question.questiontype == 'FIB' or question.questiontype == 'FMB': - title_text = re.sub(r"\[(.*?)\]", "_______", title_text) - - title_text = html_to_plain(title_text) - title_text = trim_text(title_text) - - if prefix != '': - prefix = prefix + ' ' - title_text = re.sub(r"\s*\[IMG\]", "", title_text).strip() - title_text = re.sub(r"\s*\[TABLE\]", "", title_text).strip() - - title_text = prefix + title_text - question.title = title_text[0:127] - question.save() - - except Exception as e: - logger.error(e) - - logger.debug("Adding Images back DONE") - - - -########################################### # serialize and send response ########################################### logger.info("Process End") - serialized_ql = JsonResponseSerializer(process.questionlibrary) - self.send(text_data=json.dumps(process.sendformat("Done", "", serialized_ql.data))) + json_data = build_response_payload(process.questionlibrary, preview=True) + done_payload = build_status_payload( + "Done", + "", + json_data, + process=process, + questionlibrary=process.questionlibrary, + ) + self.send(text_data=json.dumps(done_payload)) ######################### Close Connection self.send(text_data=json.dumps(process.sendformat("Close", "", ""))) diff --git a/api/formats/__init__.py b/api/formats/__init__.py new file mode 100644 index 0000000..dc75237 --- /dev/null +++ b/api/formats/__init__.py @@ -0,0 +1 @@ +# Package for supported content formats. diff --git a/api/formats/docx/__init__.py b/api/formats/docx/__init__.py new file mode 100644 index 0000000..cda4da3 --- /dev/null +++ b/api/formats/docx/__init__.py @@ -0,0 +1 @@ +# DOCX format handlers. diff --git a/api/process/convert_txt.py b/api/formats/docx/convert_txt.py similarity index 100% rename from api/process/convert_txt.py rename to api/formats/docx/convert_txt.py diff --git a/api/process/endanswers.py b/api/formats/docx/endanswers.py similarity index 97% rename from api/process/endanswers.py rename to api/formats/docx/endanswers.py index 7ceedd8..0082ea3 100644 --- a/api/process/endanswers.py +++ b/api/formats/docx/endanswers.py @@ -1,7 +1,7 @@ import os import subprocess import xml.etree.ElementTree as ET -from ..models import EndAnswer +from ...models import EndAnswer import re def get_endanswers(questionlibrary): diff --git a/api/process/extract_images.py b/api/formats/docx/extract_images.py similarity index 97% rename from api/process/extract_images.py rename to api/formats/docx/extract_images.py index d184946..e8a298e 100644 --- a/api/process/extract_images.py +++ b/api/formats/docx/extract_images.py @@ -1,5 +1,5 @@ import re -from ..models import Image +from ...models import Image def extract_images(questionlibrary): try: diff --git a/api/process/fix_numbering.py b/api/formats/docx/fix_numbering.py similarity index 100% rename from api/process/fix_numbering.py rename to api/formats/docx/fix_numbering.py diff --git a/api/process/formatter.py b/api/formats/docx/formatter.py similarity index 64% rename from api/process/formatter.py rename to api/formats/docx/formatter.py index 5a1af05..a451d52 100644 --- a/api/process/formatter.py +++ b/api/formats/docx/formatter.py @@ -32,9 +32,32 @@ def run_formatter(questionlibrary): maincontenttitle = root.find('maincontent_title') logger.debug("checking maincontent title") if maincontenttitle is not None: - main_title = (maincontenttitle.text).strip() - if main_title: - questionlibrary.main_title = (trim_text(main_title)).lstrip('# ') + raw_main = (maincontenttitle.text or "").strip() + if raw_main: + # Use the first H1 line as the title; remaining lines become root-level text + main_lines = raw_main.splitlines() + title_index = None + for idx, line in enumerate(main_lines): + if line.lstrip().startswith('#'): + title_index = idx + break + + if title_index is not None: + main_title = main_lines[title_index].strip() + main_title = (trim_text(main_title)).lstrip('# ').strip() + main_text_lines = main_lines[title_index + 1:] + else: + # Fallback: treat the first line as title if no H1 is found + main_title = (trim_text(main_lines[0])).lstrip('# ').strip() + main_text_lines = main_lines[1:] + + main_text = "\n".join(main_text_lines).strip() + + if main_title: + questionlibrary.main_title = main_title + if main_text: + # Preserve raw markdown for root-level text + questionlibrary.main_text = main_text questionlibrary.save() # ==================================== BODY diff --git a/api/process/parser.py b/api/formats/docx/parser.py similarity index 98% rename from api/process/parser.py rename to api/formats/docx/parser.py index 4761d3b..659eece 100644 --- a/api/process/parser.py +++ b/api/formats/docx/parser.py @@ -1,6 +1,6 @@ import os import xml.etree.ElementTree as ET -from ..models import EndAnswer, Section, Question +from ...models import EndAnswer, Section, Question from django.conf import settings import logging diff --git a/api/process/process_helper.py b/api/formats/docx/process_helper.py similarity index 100% rename from api/process/process_helper.py rename to api/formats/docx/process_helper.py diff --git a/api/process/sectioner.py b/api/formats/docx/sectioner.py similarity index 93% rename from api/process/sectioner.py rename to api/formats/docx/sectioner.py index 05bbb84..7d3d06e 100644 --- a/api/process/sectioner.py +++ b/api/formats/docx/sectioner.py @@ -3,7 +3,7 @@ import xml.etree.ElementTree as ET # from .process_helper import markdown_to_plain, trim_text, markdown_to_html from api.tasks import markdown_to_plain, trim_text, markdown_to_html -from ..models import Section +from ...models import Section import logging newlogger = logging.getLogger(__name__) @@ -60,6 +60,9 @@ def run_sectioner(questionlibrary): sectionobject.raw_content = maincontent.text sectionobject.is_main_content = True sectionobject.title = questionlibrary.main_title + if questionlibrary.main_text: + sectionobject.text = markdown_to_html(questionlibrary.main_text) + sectionobject.is_text_displayed = True sectiontext = section.find('sectiontext') if sectiontext is not None: diff --git a/api/process/splitter.py b/api/formats/docx/splitter.py similarity index 99% rename from api/process/splitter.py rename to api/formats/docx/splitter.py index 391c371..caf227f 100644 --- a/api/process/splitter.py +++ b/api/formats/docx/splitter.py @@ -1,8 +1,8 @@ import os import subprocess import xml.etree.ElementTree as ET -from ..models import Section -from ..models import Question +from ...models import Section +from ...models import Question # from .process_helper import trim_text from api.tasks import trim_text import logging diff --git a/api/formats/scorm/__init__.py b/api/formats/scorm/__init__.py new file mode 100644 index 0000000..9d3416e --- /dev/null +++ b/api/formats/scorm/__init__.py @@ -0,0 +1 @@ +# SCORM format handlers. diff --git a/api/formats/scorm/manifest.py b/api/formats/scorm/manifest.py new file mode 100644 index 0000000..46f6374 --- /dev/null +++ b/api/formats/scorm/manifest.py @@ -0,0 +1,79 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +import xml.etree.cElementTree as ET + +NS_D2L = "http://desire2learn.com/xsd/d2lcp_v2p0" +NS_IMS = "http://www.imsglobal.org/xsd/imscp_v1p1" + + +class ManifestEntity(object): + resources = [] + + def __init__(self): + del self.resources[:] + + def add_resource(self, manifest_resource_entity): + self.resources.append(manifest_resource_entity) + + +class ManifestResourceEntity(object): + def __init__(self, identifier, resource_type, material_type, href, title = '', link_target = ''): + self.identifier = identifier + self.resource_type = resource_type + self.material_type = material_type + self.href = href + self.title = title + self.link_target = link_target + + +def build_manifest_tree(manifest_entity: ManifestEntity, identifier: str = "MANIFEST_1") -> ET.ElementTree: + """ + Build an imsmanifest.xml tree using shared namespaces/constants. + """ + root = ET.Element( + "manifest", + {"xmlns:d2l_2p0": NS_D2L, "xmlns": NS_IMS, "identifier": identifier}, + ) + resources_el = ET.SubElement(root, "resources") + for resource in manifest_entity.resources: + ET.SubElement( + resources_el, + "resource", + { + "identifier": resource.identifier, + "type": resource.resource_type, + "d2l_2p0:material_type": resource.material_type, + "href": resource.href, + "d2l_2p0:link_target": resource.link_target, + "title": resource.title, + }, + ) + return ET.ElementTree(root) + + +def parse_manifest_tree(tree: ET.ElementTree) -> dict: + """ + Parse an imsmanifest.xml ElementTree into a simple dict structure + consistent with XmlReader.parse_manifest output. + """ + root = tree.getroot() + manifest_data = { + "identifier": root.get("identifier", ""), + "resources": [], + } + resources_el = root.find("resources") + if resources_el is not None: + for resource_el in resources_el.findall("resource"): + manifest_data["resources"].append( + { + "identifier": resource_el.get("identifier", ""), + "type": resource_el.get("type", ""), + "material_type": resource_el.get(f"{{{NS_D2L}}}material_type", ""), + "href": resource_el.get("href", ""), + "link_target": resource_el.get(f"{{{NS_D2L}}}link_target", ""), + "title": resource_el.get("title", ""), + } + ) + return manifest_data \ No newline at end of file diff --git a/api/formats/scorm/manifest_builder.py b/api/formats/scorm/manifest_builder.py new file mode 100644 index 0000000..7db3106 --- /dev/null +++ b/api/formats/scorm/manifest_builder.py @@ -0,0 +1,29 @@ +import xml.etree.cElementTree as ET + + +def build_manifest(manifest_entity): + root = ET.Element( + "manifest", + { + "xmlns:d2l_2p0": "http://desire2learn.com/xsd/d2lcp_v2p0", + "xmlns": "http://www.imsglobal.org/xsd/imscp_v1p1", + "identifier": "MANIFEST_1", + }, + ) + doc = ET.SubElement(root, "resources") + + for resource in manifest_entity.resources: + ET.SubElement( + doc, + "resource", + { + "identifier": resource.identifier, + "type": resource.resource_type, + "d2l_2p0:material_type": resource.material_type, + "href": resource.href, + "d2l_2p0:link_target": resource.link_target, + "title": resource.title, + }, + ) + + return ET.ElementTree(root) diff --git a/api/formats/scorm/scorm_extractor.py b/api/formats/scorm/scorm_extractor.py new file mode 100644 index 0000000..fa7151d --- /dev/null +++ b/api/formats/scorm/scorm_extractor.py @@ -0,0 +1,255 @@ +from api.models import ( + QuestionLibrary, + Section, + Question, + MultipleChoice, + MultipleChoiceAnswer, + TrueFalse, + Fib, + MultipleSelect, + MultipleSelectAnswer, + Matching, + MatchingChoice, + MatchingAnswer, + Ordering, + WrittenResponse, +) + +from .scorm_unzipper import extract_scorm_zip +from .scorm_parser import ScormParser + + +class ScormExtractor: + """ + Import SCORM XML data into Django models. + """ + + def __init__(self, scorm_zip_path, extract_to_path=None): + self.scorm_zip_path = scorm_zip_path + self.extracted_path = extract_scorm_zip(scorm_zip_path, extract_to_path) + self.parser = ScormParser(self.extracted_path) + + def parse_manifest(self): + return self.parser.parse_manifest() + + def parse_questiondb(self): + return self.parser.parse_questiondb() + + def populate_django_models(self, question_library=None): + """ + Populate Django models from parsed SCORM XML data. + + Args: + question_library: Optional existing QuestionLibrary instance to use. + If None, a new one will be created. + + Returns: + QuestionLibrary: The QuestionLibrary instance with all sections and questions + """ + question_library_data = self.parse_questiondb() + + main_title = "" + if question_library_data["sections"]: + main_title = question_library_data["sections"][0].get("title", "") + + if question_library is None: + question_library = QuestionLibrary.objects.create( + main_title=main_title, + shuffle=False, + ) + else: + question_library.main_title = main_title + question_library.save() + + section_order = 1 + question_index = 1 + for section_data in question_library_data["sections"]: + has_nested_sections = len(section_data.get("sections", [])) > 0 + has_direct_questions = len(section_data.get("questions", [])) > 0 + has_text = section_data.get("text", "").strip() != "" + should_set_main_text = ( + has_text + # and section_data.get("is_text_displayed", False) + and not question_library.main_text + ) + if should_set_main_text: + question_library.main_text = section_data.get("text", "") + question_library.save(update_fields=["main_text"]) + + if has_direct_questions or has_text: + section = Section.objects.create( + question_library=question_library, + is_main_content=True, + order=section_order, + title=section_data.get("title", ""), + is_title_displayed=section_data.get("is_title_displayed", True), + text=section_data.get("text", ""), + is_text_displayed=section_data.get("is_text_displayed", False), + shuffle=section_data.get("shuffle", False), + ) + + for question_data in section_data.get("questions", []): + self._create_question_model(section, question_data, question_index) + question_index += 1 + + for nested_section_data in section_data.get("sections", []): + nested_section = Section.objects.create( + question_library=question_library, + is_main_content=False, + order=section_order + 1, + title=nested_section_data.get("title", ""), + is_title_displayed=nested_section_data.get("is_title_displayed", True), + text=nested_section_data.get("text", ""), + is_text_displayed=nested_section_data.get("is_text_displayed", False), + shuffle=nested_section_data.get("shuffle", False), + ) + + for question_data in nested_section_data.get("questions", []): + self._create_question_model(nested_section, question_data, question_index) + question_index += 1 + + section_order += 1 + + section_order += 1 + elif has_nested_sections: + for nested_section_data in section_data.get("sections", []): + nested_section = Section.objects.create( + question_library=question_library, + is_main_content=False, + order=section_order, + title=nested_section_data.get("title", ""), + is_title_displayed=nested_section_data.get("is_title_displayed", True), + text=nested_section_data.get("text", ""), + is_text_displayed=nested_section_data.get("is_text_displayed", False), + shuffle=section_data.get("shuffle", False), + ) + + for question_data in nested_section_data.get("questions", []): + self._create_question_model(nested_section, question_data, question_index) + question_index += 1 + + section_order += 1 + + return question_library + + def _create_question_model(self, section, question_data, index): + question = Question.objects.create( + section=section, + index=index, + title=question_data.get("title", ""), + questiontype=question_data.get("question_type_code", ""), + text=question_data.get("text", ""), + points=question_data.get("points", 1.0), + hint=question_data.get("hint"), + feedback=question_data.get("feedback"), + ) + + question_type_code = question_data.get("question_type_code", "") + specific_data = question_data.get("question_specific_data", {}) + + if question_type_code == "MC": + self._create_multiple_choice_model(question, specific_data) + elif question_type_code == "TF": + self._create_true_false_model(question, specific_data) + elif question_type_code == "FIB": + self._create_fib_model(question, specific_data) + elif question_type_code == "MS": + self._create_multiple_select_model(question, specific_data) + elif question_type_code == "MAT": + self._create_matching_model(question, specific_data) + elif question_type_code == "ORD": + self._create_ordering_model(question, specific_data) + elif question_type_code == "WR": + self._create_written_response_model(question, specific_data) + + return question + + def _create_multiple_choice_model(self, question, mc_data): + mc = MultipleChoice.objects.create( + question=question, + randomize=mc_data.get("randomize", False), + enumeration=mc_data.get("enumeration", 4), + ) + + for answer_data in mc_data.get("answers", []): + MultipleChoiceAnswer.objects.create( + multiple_choice=mc, + order=answer_data.get("order", 1), + answer=answer_data.get("answer", ""), + answer_feedback=answer_data.get("answer_feedback"), + weight=answer_data.get("weight", 0.0), + ) + + def _create_true_false_model(self, question, tf_data): + TrueFalse.objects.create( + question=question, + true_weight=tf_data.get("true_weight", 0.0), + true_feedback=tf_data.get("true_feedback"), + false_weight=tf_data.get("false_weight", 0.0), + false_feedback=tf_data.get("false_feedback"), + enumeration=tf_data.get("enumeration", 4), + ) + + def _create_fib_model(self, question, fib_data): + for fib_item in fib_data.get("fibs", []): + Fib.objects.create( + question=question, + type=fib_item.get("type", "fibquestion"), + text=fib_item.get("text", ""), + order=fib_item.get("order", 1), + size=fib_item.get("size"), + ) + + def _create_multiple_select_model(self, question, ms_data): + ms = MultipleSelect.objects.create( + question=question, + randomize=ms_data.get("randomize", False), + enumeration=ms_data.get("enumeration", 4), + style=ms_data.get("style", 2), + grading_type=ms_data.get("grading_type", 2), + ) + + for answer_data in ms_data.get("answers", []): + MultipleSelectAnswer.objects.create( + multiple_select=ms, + order=answer_data.get("order", 1), + answer=answer_data.get("answer", ""), + answer_feedback=answer_data.get("answer_feedback"), + is_correct=answer_data.get("is_correct", False), + ) + + def _create_matching_model(self, question, mat_data): + matching = Matching.objects.create( + question=question, + grading_type=mat_data.get("grading_type", 0), + ) + + for choice_data in mat_data.get("choices", []): + matching_choice = MatchingChoice.objects.create( + matching=matching, + choice_text=choice_data.get("choice_text", ""), + ) + + for answer_data in choice_data.get("matching_answers", []): + MatchingAnswer.objects.create( + matching_choice=matching_choice, + answer_text=answer_data.get("answer_text", ""), + ) + + def _create_ordering_model(self, question, ord_data): + for item_data in ord_data.get("items", []): + Ordering.objects.create( + question=question, + text=item_data.get("text", ""), + order=item_data.get("order", 1), + ord_feedback=item_data.get("ord_feedback"), + ) + + def _create_written_response_model(self, question, wr_data): + WrittenResponse.objects.create( + question=question, + enable_student_editor=wr_data.get("enable_student_editor", False), + initial_text=wr_data.get("initial_text"), + answer_key=wr_data.get("answer_key", ""), + enable_attachments=wr_data.get("enable_attachments", False), + ) diff --git a/api/formats/scorm/scorm_formatter.py b/api/formats/scorm/scorm_formatter.py new file mode 100644 index 0000000..0b06f69 --- /dev/null +++ b/api/formats/scorm/scorm_formatter.py @@ -0,0 +1,415 @@ +import re +from bs4 import BeautifulSoup + + +class ScormFormatter: + """ + Format question library models into markdown and DOCX. + """ + + def _html_to_markdown(self, html_text): + """ + Convert HTML text with base64 images to markdown format. + Preserves ALL tags as HTML. Converts MathML to TeX when possible. + """ + if not html_text: + return "" + + img_pattern = r"]*?>" + + html_images = {} + image_counter = 0 + + def preserve_img_tag(match): + nonlocal image_counter + full_img_tag = match.group(0) + placeholder = f"__HTML_IMAGE_{image_counter}__" + html_images[placeholder] = full_img_tag + image_counter += 1 + return placeholder + + math_blocks = {} + math_counter = 0 + math_pattern = r"" + + def preserve_math(match): + nonlocal math_counter + full_math = match.group(0) + placeholder = f"__MATH_BLOCK_{math_counter}__" + tex_match = re.search( + r']*encoding=["\']application/x-tex["\'][^>]*>(.*?)', + full_math, + flags=re.IGNORECASE | re.DOTALL, + ) + tex = tex_match.group(1) if tex_match else None + math_blocks[placeholder] = {"tex": tex, "raw": full_math} + math_counter += 1 + return placeholder + + result = re.sub(img_pattern, preserve_img_tag, html_text) + result = re.sub(math_pattern, preserve_math, result, flags=re.IGNORECASE) + + result = re.sub(r"

", "\n", result, flags=re.IGNORECASE) + result = re.sub(r"]*>", "\n", result, flags=re.IGNORECASE) + + try: + soup = BeautifulSoup(result, "html.parser") + for br in soup.find_all("br"): + br.replace_with("[[[BR]]]") + text = soup.get_text(separator=" ", strip=False) + text = text.replace("[[[BR]]]", "\n") + except Exception: + text = re.sub(r"<(?!/?__HTML_IMAGE_)[^>]+>", "", result) + + for placeholder, math_info in math_blocks.items(): + replacement = None + if math_info.get("tex"): + tex = math_info["tex"].strip() + replacement = f"$$ {tex} $$" + else: + replacement = math_info.get("raw", "") + text = text.replace(placeholder, replacement) + for placeholder, html_img in html_images.items(): + text = text.replace(placeholder, html_img) + + text = text.replace("\r", "") + text = re.sub(r"\n{3,}", "\n\n", text) + normalized_lines = [] + for line in text.split("\n"): + stripped = line.strip() + if stripped == "": + normalized_lines.append("") + continue + if ( + re.search(r"]*>", stripped, flags=re.IGNORECASE) + or re.search(r"\s+<", "><", result) + result = re.sub(r"\s+", " ", result) + result = result.strip() + return result + except Exception: + cleaned = re.sub(r">\s+<", "><", html_text) + cleaned = re.sub(r"\s+", " ", cleaned).strip() + return cleaned + + def format_to_markdown(self, question_library): + """ + Format parsed questions from Django models into markdown/text format. + """ + lines = [] + + if question_library.main_title: + main_title = question_library.main_title + try: + soup = BeautifulSoup(main_title, "html.parser") + main_title = soup.get_text(separator=" ", strip=True) + except Exception: + main_title = re.sub(r"\s+", " ", main_title).strip() + lines.append(f"# {main_title}") + lines.append("") + + if getattr(question_library, "main_text", None): + main_text = self._html_to_markdown(question_library.main_text) + lines.append(main_text) + lines.append("") + + sections = question_library.get_sections() + for section in sections: + if not section.is_main_content: + if section.title and section.is_title_displayed: + section_title_display = section.title + try: + soup = BeautifulSoup(section_title_display, "html.parser") + section_title_display = soup.get_text(separator=" ", strip=True) + except Exception: + section_title_display = re.sub(r"\s+", " ", section_title_display).strip() + lines.append("") + lines.append("
") + lines.append("#section") + lines.append(f"## {section_title_display}") + + should_display_text = False + if section.is_main_content: + should_display_text = section.text and section.is_text_displayed + else: + should_display_text = bool(section.text) + + if should_display_text: + section_text = self._html_to_markdown(section.text) + lines.append(section_text) + + questions = section.get_questions() + for idx, question in enumerate(questions): + question_markdown = self._format_question_to_markdown(question) + lines.append(question_markdown) + + if not section.is_main_content and idx == len(questions) - 1: + lines.append("") + lines.append("
") + lines.append("/section") + + if not section.is_main_content and len(questions) == 0: + lines.append("") + lines.append("
") + lines.append("/section") + + result = "\n".join(lines) + if result and not result.endswith("\n"): + result += "\n" + return result + + def _format_question_to_markdown(self, question): + """ + Format a single question to markdown format matching raw_content format. + """ + lines = [] + + if question.questiontype: + lines.append("") + lines.append("
") + lines.append(f"Type: {question.questiontype}") + if question.title: + lines.append(f"Title: {question.title}") + if question.points: + normalized_points = str(float(question.points)).rstrip("0").rstrip(".") + lines.append(f"Points: {normalized_points}") + + randomize_value = None + if question.questiontype == "MC": + mc = question.get_multiple_choice() + if mc and mc.randomize is not None: + randomize_value = mc.randomize + elif question.questiontype == "MS": + ms = question.get_multiple_select() + if ms and ms.randomize is not None: + randomize_value = ms.randomize + if randomize_value is True: + lines.append("Randomize: yes") + + if question.text and question.questiontype != "FIB": + question_text = self._html_to_markdown(question.text) + plain_text = re.sub(r"!\[.*?\]\([^)]+\)", "", question_text) + plain_text = re.sub(r"<[^>]+>", "", plain_text) + plain_text = re.sub(r"\s+", " ", plain_text).strip() + + question_number = None + if question.index is not None: + question_number = question.index + elif question.number_provided is not None: + question_number = question.number_provided + + if question_number is not None: + lines.append(f"{question_number}. {question_text}") + else: + lines.append(question_text) + + question_type = question.questiontype + if question_type == "MC": + answer_text = self._format_multiple_choice_markdown(question) + if answer_text: + lines.append(answer_text) + elif question_type == "TF": + answer_text = self._format_true_false_markdown(question) + if answer_text: + lines.append(answer_text) + elif question_type == "FIB": + answer_text = self._format_fib_markdown(question) + if answer_text: + question_number = None + if question.index is not None: + question_number = question.index + elif question.number_provided is not None: + question_number = question.number_provided + + if question_number is not None: + lines.append(f"{question_number}. {answer_text}") + else: + lines.append(answer_text) + elif question_type == "MS": + answer_text = self._format_multi_select_markdown(question) + if answer_text: + lines.append(answer_text) + elif question_type == "MAT": + answer_text = self._format_matching_markdown(question) + if answer_text: + lines.append(answer_text) + elif question_type == "ORD": + answer_text = self._format_ordering_markdown(question) + if answer_text: + lines.append(answer_text) + elif question_type == "WR": + answer_text = self._format_written_response_markdown(question) + if answer_text: + lines.append(answer_text) + + if question.hint: + hint_text = self._html_to_markdown(question.hint) + lines.append(f"@Hint: {hint_text}") + + if question.feedback: + feedback_text = self._html_to_markdown(question.feedback) + lines.append(f"@Feedback: {feedback_text}") + + return "\n\n".join(lines) + + def _format_multiple_choice_markdown(self, question): + lines = [] + mc = question.get_multiple_choice() + if mc: + answers = mc.get_multiple_choice_answers() + for idx, answer in enumerate(answers, start=1): + letter = chr(96 + idx) + marker = "*" if answer.weight and answer.weight > 0 else "" + answer_text = self._html_to_markdown(answer.answer) + lines.append(f" {letter}. {marker}{answer_text}") + if answer.answer_feedback: + feedback_text = self._html_to_markdown(answer.answer_feedback) + lines.append(f" @Feedback: {feedback_text}") + return "\n".join(lines) + + def _format_true_false_markdown(self, question): + lines = [] + tf = question.get_true_false() + if tf: + true_marker = "*" if tf.true_weight and tf.true_weight > 0 else "" + false_marker = "*" if tf.false_weight and tf.false_weight > 0 else "" + lines.append(f" a. {true_marker}True") + if tf.true_feedback: + feedback_text = self._html_to_markdown(tf.true_feedback) + lines.append(f" @Feedback: {feedback_text}") + lines.append(f" b. {false_marker}False") + if tf.false_feedback: + feedback_text = self._html_to_markdown(tf.false_feedback) + lines.append(f" @Feedback: {feedback_text}") + return "\n".join(lines) + + def _format_fib_markdown(self, question): + lines = [] + fibs = question.get_fibs() + current_text = "" + for fib in fibs: + if fib.type == "fibquestion": + if fib.text: + cleaned_text = self._html_to_markdown(fib.text) + current_text += cleaned_text + elif fib.type == "fibanswer": + if fib.text: + current_text += f" [{fib.text}]" + else: + current_text += " [ ]" + if current_text: + lines.append(current_text) + return "\n".join(lines) + + def _format_multi_select_markdown(self, question): + lines = [] + ms = question.get_multiple_select() + if ms: + answers = ms.get_multiple_select_answers() + for idx, answer in enumerate(answers, start=1): + letter = chr(96 + idx) + marker = "*" if answer.is_correct else "" + answer_text = self._html_to_markdown(answer.answer) + lines.append(f" {letter}. {marker}{answer_text}") + if answer.answer_feedback: + feedback_text = self._html_to_markdown(answer.answer_feedback) + lines.append(f" @Feedback: {feedback_text}") + return "\n".join(lines) + + def _format_matching_markdown(self, question): + lines = [] + matching = question.get_matching() + if matching: + choices = matching.get_matching_choices() + for idx, choice in enumerate(choices, start=1): + letter = chr(96 + idx) + choice_text = self._html_to_markdown(choice.choice_text) + + answers = choice.matching_answers.all() + if answers: + answer = answers[0] + answer_text = self._html_to_markdown(answer.answer_text) + lines.append(f" {letter}. {choice_text} = {answer_text}") + else: + lines.append(f" {letter}. {choice_text} =") + return "\n".join(lines) + + def _format_ordering_markdown(self, question): + lines = [] + orderings = question.get_orderings() + for idx, ordering in enumerate(orderings, start=1): + letter = chr(96 + idx) + ordering_text = self._html_to_markdown(ordering.text) + lines.append(f" {letter}. {ordering_text}") + if ordering.ord_feedback: + feedback_text = self._html_to_markdown(ordering.ord_feedback) + lines.append(f" @Feedback: {feedback_text}") + return "\n".join(lines) + + def _format_written_response_markdown(self, question): + lines = [] + wr = question.get_written_response() + if wr and wr.answer_key: + lines.append("") + answer_text = self._html_to_markdown(wr.answer_key) + lines.append("Correct Answer:") + lines.append(f"{answer_text}") + return "\n\n".join(lines) + + def convert_markdown_to_docx(self, markdown_text, output_path): + """ + Convert markdown text to DOCX file using pandoc. + """ + import pypandoc + import tempfile + import os + + with tempfile.NamedTemporaryFile(mode="w", suffix=".md", delete=False, encoding="utf-8") as temp_md: + temp_md.write(markdown_text) + temp_md_path = temp_md.name + + try: + pypandoc.convert_file( + temp_md_path, + format="markdown_github+fancy_lists+emoji+hard_line_breaks+all_symbols_escapable+escaped_line_breaks+pipe_tables+startnum+tex_math_dollars", + to="docx+empty_paragraphs", + outputfile=output_path, + extra_args=[ + "--no-highlight", + "--preserve-tabs", + "--wrap=preserve", + "--indent=false", + "--mathml", + "--ascii", + ], + ) + finally: + if os.path.exists(temp_md_path): + os.unlink(temp_md_path) + + return output_path diff --git a/api/formats/scorm/scorm_parser.py b/api/formats/scorm/scorm_parser.py new file mode 100644 index 0000000..b9f0655 --- /dev/null +++ b/api/formats/scorm/scorm_parser.py @@ -0,0 +1,845 @@ +import os +import re +import base64 +import html +import xml.etree.cElementTree as ET +from os import path +from bs4 import BeautifulSoup + + +class ScormParser: + """ + Parse SCORM XML files (questiondb.xml, imsmanifest.xml) into dicts. + """ + + def __init__(self, extracted_path): + self.extracted_path = extracted_path + self.questiondb_xml = None + self.imsmanifest_xml = None + self._parse_xml_files() + + def _parse_xml_files(self): + """Parse questiondb.xml and imsmanifest.xml from extracted files.""" + questiondb_path = path.join(self.extracted_path, "questiondb.xml") + imsmanifest_path = path.join(self.extracted_path, "imsmanifest.xml") + + if not path.exists(questiondb_path): + raise FileNotFoundError(f"questiondb.xml not found in SCORM package: {questiondb_path}") + + if not path.exists(imsmanifest_path): + raise FileNotFoundError(f"imsmanifest.xml not found in SCORM package: {imsmanifest_path}") + + self.questiondb_xml = ET.parse(questiondb_path) + self.imsmanifest_xml = ET.parse(imsmanifest_path) + + def parse_manifest(self): + """ + Parse imsmanifest.xml and extract metadata. + + Returns: + dict: Dictionary containing manifest metadata + """ + root = self.imsmanifest_xml.getroot() + + manifest_data = { + "identifier": root.get("identifier", ""), + "resources": [], + } + + resources_el = root.find("resources") + if resources_el is not None: + for resource_el in resources_el.findall("resource"): + resource_data = { + "identifier": resource_el.get("identifier", ""), + "type": resource_el.get("type", ""), + "material_type": resource_el.get("{http://desire2learn.com/xsd/d2lcp_v2p0}material_type", ""), + "href": resource_el.get("href", ""), + "link_target": resource_el.get("{http://desire2learn.com/xsd/d2lcp_v2p0}link_target", ""), + "title": resource_el.get("title", ""), + } + manifest_data["resources"].append(resource_data) + + return manifest_data + + def parse_questiondb(self): + """ + Parse questiondb.xml and extract question library structure. + + Returns: + dict: Dictionary containing question library data structure + """ + root = self.questiondb_xml.getroot() + objectbank_el = root.find("objectbank") + if objectbank_el is None: + raise ValueError("objectbank element not found in questiondb.xml") + + question_library_data = { + "ident": objectbank_el.get("ident", ""), + "sections": [], + } + + base_sections = objectbank_el.findall("section") + for section_el in base_sections: + section_data = self._parse_section(section_el) + question_library_data["sections"].append(section_data) + + return question_library_data + + def _parse_section(self, section_el): + """ + Parse a section element and extract section data. + """ + section_data = { + "ident": section_el.get("ident", ""), + "title": section_el.get("title", ""), + "shuffle": False, + "is_title_displayed": True, + "is_text_displayed": False, + "text": "", + "questions": [], + } + + selection_ordering = section_el.find("selection_ordering") + if selection_ordering is not None: + order_el = selection_ordering.find("order") + if order_el is not None and order_el.get("order_type") == "Random": + section_data["shuffle"] = True + + presentation_material = section_el.find("presentation_material") + if presentation_material is not None: + text = self._extract_material_text(presentation_material) + section_data["text"] = text + + sectionproc = section_el.find("sectionproc_extension") + if sectionproc is not None: + display_name = sectionproc.find("{http://desire2learn.com/xsd/d2lcp_v2p0}display_section_name") + if display_name is not None: + section_data["is_title_displayed"] = display_name.text.lower() == "yes" + + type_display = sectionproc.find("{http://desire2learn.com/xsd/d2lcp_v2p0}type_display_section") + if type_display is not None: + section_data["is_text_displayed"] = type_display.text == "1" + + nested_sections = section_el.findall("section") + for nested_section_el in nested_sections: + nested_section_data = self._parse_section(nested_section_el) + section_data["sections"] = section_data.get("sections", []) + section_data["sections"].append(nested_section_data) + + items = section_el.findall("item") + for item_el in items: + question_data = self._parse_question(item_el) + section_data["questions"].append(question_data) + + return section_data + + def _parse_question(self, item_el): + """ + Parse a question (item) element and extract question data. + """ + question_data = { + "ident": item_el.get("ident", ""), + "label": item_el.get("label", ""), + "title": item_el.get("title", ""), + "question_type": None, + "points": 1.0, + "text": "", + "hint": None, + "feedback": None, + "question_specific_data": {}, + } + + itemmetadata = item_el.find("itemmetadata") + if itemmetadata is not None: + qtidata = itemmetadata.find("qtimetadata") + if qtidata is not None: + for field in qtidata.findall("qti_metadatafield"): + fieldlabel = field.find("fieldlabel") + fieldentry = field.find("fieldentry") + if fieldlabel is not None and fieldentry is not None: + if fieldlabel.text == "qmd_questiontype": + question_data["question_type"] = fieldentry.text + elif fieldlabel.text == "qmd_weighting": + try: + question_data["points"] = float(fieldentry.text) + except (ValueError, TypeError): + pass + + presentation = item_el.find("presentation") + if presentation is not None: + question_text = self._extract_question_text(presentation) + question_data["text"] = question_text + + hint_el = item_el.find("hint") + if hint_el is not None: + question_data["hint"] = self._extract_hint_text(hint_el) + + feedback_els = item_el.findall("itemfeedback") + for feedback_el in feedback_els: + if feedback_el.get("ident") == question_data["label"]: + question_data["feedback"] = self._extract_feedback_text(feedback_el) + + question_type = question_data["question_type"] + if question_type: + if question_type == "Multiple Choice": + question_data["question_specific_data"] = self._parse_multiple_choice(item_el, question_data["label"]) + question_data["question_type_code"] = "MC" + elif question_type == "True/False": + question_data["question_specific_data"] = self._parse_true_false(item_el, question_data["label"]) + question_data["question_type_code"] = "TF" + elif question_type == "Fill in the Blanks": + question_data["question_specific_data"] = self._parse_fill_in_the_blanks(item_el, question_data["label"]) + question_data["question_type_code"] = "FIB" + elif question_type == "Multi-Select": + question_data["question_specific_data"] = self._parse_multi_select(item_el, question_data["label"]) + question_data["question_type_code"] = "MS" + elif question_type == "Matching": + question_data["question_specific_data"] = self._parse_matching(item_el, question_data["label"]) + question_data["question_type_code"] = "MAT" + elif question_type == "Ordering": + question_data["question_specific_data"] = self._parse_ordering(item_el, question_data["label"]) + question_data["question_type_code"] = "ORD" + elif question_type == "Long Answer": + question_data["question_specific_data"] = self._parse_written_response(item_el, question_data["label"]) + question_data["question_type_code"] = "WR" + + return question_data + + def _extract_material_text(self, material_el): + """ + Extract text content from material element, handling CDATA and images. + """ + text_parts = [] + + flow_mat = material_el.find("flow_mat") + if flow_mat is not None: + materials = flow_mat.findall(".//material") + for material in materials: + mattext = material.find("mattext") + if mattext is not None: + raw_text = mattext.text if mattext.text else "" + if mattext.tail: + raw_text += mattext.tail + decoded_text = html.unescape(raw_text) + cleaned_text = self._clean_cdata(decoded_text) + cleaned_text = self._inline_scorm_images(cleaned_text) + text_parts.append(cleaned_text) + + return "".join(text_parts) + + def _extract_question_text(self, presentation_el): + """ + Extract question text from presentation element. + """ + text_parts = [] + + flow = presentation_el.find("flow") + if flow is not None: + material = flow.find("material") + if material is not None: + mattext = material.find("mattext") + if mattext is not None: + raw_text = mattext.text if mattext.text else "" + if mattext.tail: + raw_text += mattext.tail + decoded_text = html.unescape(raw_text) + cleaned_text = self._clean_cdata(decoded_text) + cleaned_text = self._inline_scorm_images(cleaned_text) + text_parts.append(cleaned_text) + + return "".join(text_parts) + + def _extract_hint_text(self, hint_el): + """Extract text from hint element.""" + hintmaterial = hint_el.find("hintmaterial") + if hintmaterial is not None: + return self._extract_material_text(hintmaterial) + return None + + def _extract_feedback_text(self, feedback_el): + """ + Extract text from feedback element. + """ + material = feedback_el.find("material") + if material is not None: + mattext = material.find("mattext") + if mattext is not None: + raw_text = mattext.text if mattext.text else "" + decoded_text = html.unescape(raw_text) + cleaned_text = self._clean_cdata(decoded_text) + return self._inline_scorm_images(cleaned_text) + return None + + def _clean_cdata(self, text): + """ + Normalize whitespace from CDATA sections while preserving HTML tags. + """ + if not text: + return "" + + try: + cleaned = re.sub(r"[ \t\n\r]+", " ", text) + cleaned = re.sub(r">\s+<", "><", cleaned) + cleaned = cleaned.strip() + return cleaned + except Exception: + cleaned = re.sub(r"\s+", " ", text).strip() + return cleaned + + def _inline_scorm_images(self, html_text): + """ + Convert SCORM image file paths to base64 data URIs in HTML text. + """ + if not html_text or not self.extracted_path: + return html_text + + img_pattern = r']*?)src=["\']([^"\']+)["\']([^>]*?)>' + + def replace_image(match): + before_src = match.group(1) + img_src = match.group(2) + after_src = match.group(3) + + if img_src.startswith("data:") or "base64" in img_src: + return match.group(0) + + if img_src.startswith("http://") or img_src.startswith("https://"): + return match.group(0) + + try: + img_path = img_src.lstrip("./") + possible_paths = [ + path.join(self.extracted_path, img_path), + path.join(self.extracted_path, "assessment-assets", path.basename(img_path)), + ] + + image_file = None + for possible_path in possible_paths: + if path.exists(possible_path) and path.isfile(possible_path): + image_file = possible_path + break + + if not image_file: + for root, dirs, files in os.walk(self.extracted_path): + if path.basename(img_path) in files: + image_file = path.join(root, path.basename(img_path)) + break + + if image_file and path.exists(image_file): + with open(image_file, "rb") as f: + image_data = f.read() + base64_data = base64.b64encode(image_data).decode("utf-8") + + ext = path.splitext(image_file)[1].lower() + mime_types = { + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".gif": "image/gif", + ".svg": "image/svg+xml", + ".webp": "image/webp", + } + mime_type = mime_types.get(ext, "image/png") + + base64_src = f"data:{mime_type};base64,{base64_data}" + import logging + logger = logging.getLogger(__name__) + logger.info( + f"Converted SCORM image {path.basename(image_file)} to base64 ({len(base64_data)} chars)" + ) + return f'' + else: + import logging + logger = logging.getLogger(__name__) + logger.warning(f"SCORM image not found: {img_src} (searched in {self.extracted_path})") + return match.group(0) + except Exception: + return match.group(0) + + result = re.sub(img_pattern, replace_image, html_text) + return result + + def _parse_multiple_choice(self, item_el, question_ident): + """ + Parse multiple choice question data. + """ + mc_data = { + "randomize": False, + "enumeration": 4, + "answers": [], + } + + presentation = item_el.find("presentation") + if presentation is None: + return mc_data + + flow = presentation.find("flow") + if flow is None: + return mc_data + + response_ext = flow.find("response_extension") + if response_ext is not None: + enumeration_el = response_ext.find("{http://desire2learn.com/xsd/d2lcp_v2p0}enumeration") + if enumeration_el is not None and enumeration_el.text: + try: + mc_data["enumeration"] = int(enumeration_el.text) + except (ValueError, TypeError): + pass + + response_lid = flow.find("response_lid") + if response_lid is not None: + render_choice = response_lid.find("render_choice") + if render_choice is not None: + mc_data["randomize"] = render_choice.get("shuffle", "no").lower() == "yes" + + question_lid = response_lid.get("ident", "") + answer_index = 1 + for flow_label in response_lid.findall(".//flow_label"): + response_label = flow_label.find("response_label") + if response_label is not None: + answer_ident = response_label.get("ident", "") + mattext = response_label.find(".//mattext") + answer_text = "" + if mattext is not None: + raw_text = mattext.text if mattext.text else "" + decoded_text = html.unescape(raw_text) + answer_text = self._clean_cdata(decoded_text) + + weight = 0.0 + answer_feedback = None + resprocessing = item_el.find("resprocessing") + if resprocessing is not None: + for respcondition in resprocessing.findall("respcondition"): + conditionvar = respcondition.find("conditionvar") + if conditionvar is not None: + varequal = conditionvar.find("varequal") + if varequal is not None and varequal.get("respident") == question_lid: + if varequal.text == answer_ident: + setvar = respcondition.find("setvar") + if setvar is not None: + try: + weight = float(setvar.text) + except (ValueError, TypeError): + pass + + displayfeedback = respcondition.find("displayfeedback") + if displayfeedback is not None: + feedback_ident = displayfeedback.get("linkrefid", "") + feedback_el = item_el.find( + f".//itemfeedback[@ident='{feedback_ident}']" + ) + if feedback_el is not None: + answer_feedback = self._extract_feedback_text(feedback_el) + + mc_data["answers"].append( + { + "answer": answer_text, + "weight": weight, + "answer_feedback": answer_feedback, + "order": answer_index, + } + ) + answer_index += 1 + + return mc_data + + def _parse_true_false(self, item_el, question_ident): + """ + Parse true/false question data. + """ + tf_data = { + "true_weight": 0.0, + "true_feedback": None, + "false_weight": 0.0, + "false_feedback": None, + "enumeration": 4, + } + + presentation = item_el.find("presentation") + if presentation is None: + return tf_data + + flow = presentation.find("flow") + if flow is None: + return tf_data + + response_ext = flow.find("response_extension") + if response_ext is not None: + enumeration_el = response_ext.find("{http://desire2learn.com/xsd/d2lcp_v2p0}enumeration") + if enumeration_el is not None and enumeration_el.text: + try: + tf_data["enumeration"] = int(enumeration_el.text) + except (ValueError, TypeError): + pass + + response_lid = flow.find("response_lid") + if response_lid is not None: + question_lid = response_lid.get("ident", "") + + render_choice = response_lid.find("render_choice") + true_ident = None + false_ident = None + if render_choice is not None: + response_labels = render_choice.findall(".//response_label") + if len(response_labels) >= 1: + true_ident = response_labels[0].get("ident", "") + if len(response_labels) >= 2: + false_ident = response_labels[1].get("ident", "") + + resprocessing = item_el.find("resprocessing") + + if resprocessing is not None: + for respcondition in resprocessing.findall("respcondition"): + conditionvar = respcondition.find("conditionvar") + if conditionvar is not None: + varequal = conditionvar.find("varequal") + if varequal is not None and varequal.get("respident") == question_lid: + answer_ident = varequal.text + + if true_ident and answer_ident == true_ident: + setvar = respcondition.find("setvar") + if setvar is not None: + try: + tf_data["true_weight"] = float(setvar.text) + except (ValueError, TypeError): + pass + + displayfeedback = respcondition.find("displayfeedback") + if displayfeedback is not None: + feedback_ident = displayfeedback.get("linkrefid", "") + feedback_el = item_el.find( + f".//itemfeedback[@ident='{feedback_ident}']" + ) + if feedback_el is not None: + tf_data["true_feedback"] = self._extract_feedback_text(feedback_el) + + elif false_ident and answer_ident == false_ident: + setvar = respcondition.find("setvar") + if setvar is not None: + try: + tf_data["false_weight"] = float(setvar.text) + except (ValueError, TypeError): + pass + + displayfeedback = respcondition.find("displayfeedback") + if displayfeedback is not None: + feedback_ident = displayfeedback.get("linkrefid", "") + feedback_el = item_el.find( + f".//itemfeedback[@ident='{feedback_ident}']" + ) + if feedback_el is not None: + tf_data["false_feedback"] = self._extract_feedback_text(feedback_el) + + return tf_data + + def _parse_fill_in_the_blanks(self, item_el, question_ident): + """ + Parse fill in the blanks question data. + """ + fib_data = {"fibs": []} + + presentation = item_el.find("presentation") + if presentation is None: + return fib_data + + flow = presentation.find("flow") + if flow is None: + return fib_data + + idx = 1 + for child in flow: + if child.tag == "material": + mattext = child.find("mattext") + text = "" + if mattext is not None: + raw_text = mattext.text if mattext.text else "" + text = html.unescape(raw_text) + + fib_data["fibs"].append({"type": "fibquestion", "text": text, "order": idx}) + + elif child.tag == "response_str": + question_ans = question_ident + str(idx) + "_ANS" + + answers = [] + resprocessing = item_el.find("resprocessing") + if resprocessing is not None: + for respcondition in resprocessing.findall("respcondition"): + conditionvar = respcondition.find("conditionvar") + if conditionvar is not None: + varequal = conditionvar.find("varequal") + if varequal is not None and varequal.get("respident") == question_ans: + answer_text = varequal.text if varequal.text else "" + if answer_text: + answers.append(answer_text) + + fib_data["fibs"].append( + { + "type": "fibanswer", + "text": ",".join(answers) if answers else "", + "order": idx, + "size": 30, + } + ) + idx += 1 + + return fib_data + + def _parse_multi_select(self, item_el, question_ident): + """ + Parse multi-select question data. + """ + ms_data = { + "randomize": False, + "enumeration": 4, + "style": 2, + "grading_type": 2, + "answers": [], + } + + presentation = item_el.find("presentation") + if presentation is None: + return ms_data + + flow = presentation.find("flow") + if flow is None: + return ms_data + + response_ext = flow.find("response_extension") + if response_ext is not None: + enumeration_el = response_ext.find("{http://desire2learn.com/xsd/d2lcp_v2p0}enumeration") + if enumeration_el is not None and enumeration_el.text: + try: + ms_data["enumeration"] = int(enumeration_el.text) + except (ValueError, TypeError): + pass + + grading_type_el = response_ext.find("{http://desire2learn.com/xsd/d2lcp_v2p0}grading_type") + if grading_type_el is not None and grading_type_el.text: + try: + ms_data["grading_type"] = int(grading_type_el.text) + except (ValueError, TypeError): + pass + + response_lid = flow.find("response_lid") + if response_lid is not None: + question_lid = response_lid.get("ident", "") + + render_choice = response_lid.find("render_choice") + if render_choice is not None: + ms_data["randomize"] = render_choice.get("shuffle", "no").lower() == "yes" + + answer_index = 1 + for flow_label in response_lid.findall(".//flow_label"): + response_label = flow_label.find("response_label") + if response_label is not None: + answer_ident = response_label.get("ident", "") + + mattext = response_label.find(".//mattext") + answer_text = "" + if mattext is not None: + raw_text = mattext.text if mattext.text else "" + decoded_text = html.unescape(raw_text) + answer_text = self._clean_cdata(decoded_text) + + is_correct = False + answer_feedback = None + resprocessing = item_el.find("resprocessing") + if resprocessing is not None: + for respcondition in resprocessing.findall("respcondition"): + conditionvar = respcondition.find("conditionvar") + if conditionvar is not None: + varequal = conditionvar.find("varequal") + if varequal is not None and varequal.get("respident") == question_lid: + if varequal.text == answer_ident: + setvar = respcondition.find("setvar") + if setvar is not None: + if setvar.get("varname") == "D2L_Correct": + is_correct = True + + displayfeedback = respcondition.find("displayfeedback") + if displayfeedback is not None: + feedback_ident = displayfeedback.get("linkrefid", "") + feedback_el = item_el.find( + f".//itemfeedback[@ident='{feedback_ident}']" + ) + if feedback_el is not None: + answer_feedback = self._extract_feedback_text(feedback_el) + + ms_data["answers"].append( + { + "answer": answer_text, + "is_correct": is_correct, + "answer_feedback": answer_feedback, + "order": answer_index, + } + ) + answer_index += 1 + + return ms_data + + def _parse_matching(self, item_el, question_ident): + """ + Parse matching question data. + """ + mat_data = { + "grading_type": 0, + "choices": [], + } + + presentation = item_el.find("presentation") + if presentation is None: + return mat_data + + flow = presentation.find("flow") + if flow is None: + return mat_data + + response_ext = flow.find("response_extension") + if response_ext is not None: + grading_type_el = response_ext.find("{http://desire2learn.com/xsd/d2lcp_v2p0}grading_type") + if grading_type_el is not None and grading_type_el.text: + try: + mat_data["grading_type"] = int(grading_type_el.text) + except (ValueError, TypeError): + pass + + matching_answers = {} + response_grps = flow.findall("response_grp") + + for response_grp in response_grps: + render_choice = response_grp.find("render_choice") + if render_choice is not None: + for response_label in render_choice.findall(".//response_label"): + answer_ident = response_label.get("ident", "") + mattext = response_label.find(".//mattext") + if mattext is not None: + raw_text = mattext.text if mattext.text else "" + answer_text = self._clean_cdata(raw_text) + if answer_text and answer_ident not in matching_answers: + matching_answers[answer_ident] = answer_text + + for response_grp in response_grps: + choice_ident = response_grp.get("respident", "") + + material = response_grp.find("material") + choice_text = "" + if material is not None: + mattext = material.find("mattext") + if mattext is not None: + raw_text = mattext.text if mattext.text else "" + decoded_text = html.unescape(raw_text) + choice_text = self._clean_cdata(decoded_text) + + correct_answer_ident = None + resprocessing = item_el.find("resprocessing") + if resprocessing is not None: + for respcondition in resprocessing.findall("respcondition"): + conditionvar = respcondition.find("conditionvar") + if conditionvar is not None: + varequal = conditionvar.find("varequal") + if varequal is not None and varequal.get("respident") == choice_ident: + setvar = respcondition.find("setvar") + if setvar is not None and setvar.get("varname") == "D2L_Correct": + correct_answer_ident = varequal.text + break + + matching_answers_list = [] + if correct_answer_ident and correct_answer_ident in matching_answers: + matching_answers_list.append({"answer_text": matching_answers[correct_answer_ident]}) + + mat_data["choices"].append( + {"choice_text": choice_text, "matching_answers": matching_answers_list} + ) + + return mat_data + + def _parse_ordering(self, item_el, question_ident): + """ + Parse ordering question data. + """ + ord_data = {"items": []} + + presentation = item_el.find("presentation") + if presentation is None: + return ord_data + + flow = presentation.find("flow") + if flow is None: + return ord_data + + response_grp = flow.find('response_grp[@rcardinality="Ordered"]') + if response_grp is None: + return ord_data + + render_choice = response_grp.find("render_choice") + if render_choice is None: + return ord_data + + order_index = 1 + for response_label in render_choice.findall(".//response_label"): + ident_num = response_label.get("ident", "") + + mattext = response_label.find(".//mattext") + text = "" + if mattext is not None: + raw_text = mattext.text if mattext.text else "" + decoded_text = html.unescape(raw_text) + text = self._clean_cdata(decoded_text) + + ord_feedback = None + question_ident_feedback = question_ident + "_IF" + feedback_ident = question_ident_feedback + str(order_index) + feedback_el = item_el.find(f".//itemfeedback[@ident='{feedback_ident}']") + if feedback_el is not None: + ord_feedback = self._extract_feedback_text(feedback_el) + + ord_data["items"].append( + {"text": text, "order": order_index, "ord_feedback": ord_feedback} + ) + order_index += 1 + + return ord_data + + def _parse_written_response(self, item_el, question_ident): + """ + Parse written response question data. + """ + wr_data = { + "enable_student_editor": False, + "initial_text": None, + "answer_key": "", + "enable_attachments": False, + } + + presentation = item_el.find("presentation") + if presentation is not None: + flow = presentation.find("flow") + if flow is not None: + response_ext = flow.find("response_extension") + if response_ext is not None: + editor_el = response_ext.find("{http://desire2learn.com/xsd/d2lcp_v2p0}has_htmleditor") + if editor_el is not None: + editor_text = editor_el.text if editor_el.text else "" + wr_data["enable_student_editor"] = editor_text.lower() == "yes" + + answer_key_el = item_el.find("answer_key") + if answer_key_el is not None: + answer_key_mat = answer_key_el.find("answer_key_material") + if answer_key_mat is not None: + mattext = answer_key_mat.find(".//mattext") + if mattext is not None: + raw_text = mattext.text if mattext.text else "" + wr_data["answer_key"] = self._clean_cdata(raw_text) + + initial_text_el = item_el.find("initial_text") + if initial_text_el is not None: + initial_text_mat = initial_text_el.find("initial_text_material") + if initial_text_mat is not None: + mattext = initial_text_mat.find(".//mattext") + if mattext is not None: + raw_text = mattext.text if mattext.text else "" + decoded_text = html.unescape(raw_text) + cleaned_text = self._clean_cdata(decoded_text) + wr_data["initial_text"] = cleaned_text if cleaned_text else None + + return wr_data diff --git a/api/formats/scorm/scorm_question_builder.py b/api/formats/scorm/scorm_question_builder.py new file mode 100644 index 0000000..e664648 --- /dev/null +++ b/api/formats/scorm/scorm_question_builder.py @@ -0,0 +1,23 @@ +from .xml_builders import ( + BaseQuestionBuilder, + MultipleChoiceBuilder, + TrueFalseBuilder, + FillInTheBlanksBuilder, + MultiSelectBuilder, + MatchingBuilder, + OrderingBuilder, + WrittenResponseBuilder, +) + + +class ScormQuestionBuilder( + BaseQuestionBuilder, + MultipleChoiceBuilder, + TrueFalseBuilder, + FillInTheBlanksBuilder, + MultiSelectBuilder, + MatchingBuilder, + OrderingBuilder, + WrittenResponseBuilder, +): + pass diff --git a/api/formats/scorm/scorm_unzipper.py b/api/formats/scorm/scorm_unzipper.py new file mode 100644 index 0000000..286328b --- /dev/null +++ b/api/formats/scorm/scorm_unzipper.py @@ -0,0 +1,30 @@ +from os import path, makedirs +from zipfile import ZipFile +from django.conf import settings + + +def extract_scorm_zip(scorm_zip_path, extract_to_path=None): + """ + Extract a SCORM ZIP file and return the extraction path. + + Args: + scorm_zip_path: Path to the SCORM ZIP file + extract_to_path: Optional path to extract ZIP contents + + Returns: + str: Path where the ZIP was extracted + """ + if not path.exists(scorm_zip_path): + raise FileNotFoundError(f"SCORM ZIP file not found: {scorm_zip_path}") + + if extract_to_path is None: + zip_basename = path.splitext(path.basename(scorm_zip_path))[0] + extract_to_path = path.join(settings.MEDIA_ROOT, f"scorm_extract_{zip_basename}") + + if not path.exists(extract_to_path): + makedirs(extract_to_path) + + with ZipFile(scorm_zip_path, "r") as zip_ref: + zip_ref.extractall(extract_to_path) + + return extract_to_path diff --git a/api/formats/scorm/scorm_writer.py b/api/formats/scorm/scorm_writer.py new file mode 100644 index 0000000..46d0846 --- /dev/null +++ b/api/formats/scorm/scorm_writer.py @@ -0,0 +1,141 @@ +import datetime +import random +import time +import xml.etree.cElementTree as ET +from uuid import UUID +from xml.dom.minidom import parseString + +from .scorm_question_builder import ScormQuestionBuilder +from .xmlcdata import CDATA + + +class ScormWriter(ScormQuestionBuilder): + def __init__(self, question_library): + ident = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f") + questionLibraryIdent = "QLIB_" + ident + root_el = ET.Element( + "questestinterop", + {"xmlns:d2l_2p0": "http://desire2learn.com/xsd/d2lcp_v2p0"}, + ) + objectbank_el = ET.SubElement( + root_el, + "objectbank", + {"ident": questionLibraryIdent, "xmlns:d2l_2p0": "http://desire2learn.com/xsd/d2lcp_v2p0"}, + ) + + base_ident = "SECT_" + str(datetime.datetime.now().strftime("%Y%m%d%H%M%S")) + str( + int(UUID(int=0x12345678123456781234567812345678)) + ) + base_section_el = ET.SubElement( + objectbank_el, + "section", + {"ident": base_ident, "title": self._safe_attr(question_library.main_title)}, + ) + if question_library.shuffle is True: + self.create_section_shuffle(base_section_el) + + self.create_presentation_material(base_section_el, question_library.main_text) + + sec_proc = ET.SubElement(base_section_el, "sectionproc_extension") + sec_proc_dis_name = ET.SubElement(sec_proc, "d2l_2p0:display_section_name") + sec_proc_dis_name.text = "yes" + sec_proc_dis_line = ET.SubElement(sec_proc, "d2l_2p0:display_section_line") + sec_proc_dis_line.text = "no" + sec_proc_dis_sec = ET.SubElement(sec_proc, "d2l_2p0:type_display_section") + sec_proc_dis_sec.text = "0" + + section_objs = question_library.get_sections() + for section_obj in section_objs: + if section_obj.is_main_content is True: + root_question_objs = section_obj.get_questions() + self.create_questions(base_section_el, root_question_objs) + else: + current_section_el = self.create_section(base_section_el, section_obj) + question_objs = section_obj.get_questions() + self.create_questions(current_section_el, question_objs) + self.questiondb_string = self.xml_to_string(root_el) + + def _safe_attr(self, value): + return "" if value is None else str(value) + + def create_section(self, parent_el, section_obj): + sectionIdent = "SECT_" + str(datetime.datetime.now().strftime("%Y%m%d%H%M%S")) + str( + int(UUID(int=0x12345678123456781234567812345678)) + ) + section_el = ET.SubElement( + parent_el, + "section", + {"ident": sectionIdent, "title": self._safe_attr(section_obj.title)}, + ) + if section_obj.shuffle is True: + self.create_section_shuffle(section_el) + + self.create_presentation_material(section_el, section_obj.text) + self.create_sectionproc_extension(section_el, section_obj) + + return section_el + + def create_section_shuffle(self, section_el): + sel_ord = ET.SubElement(section_el, "selection_ordering") + sel_ord_ord = ET.SubElement(sel_ord, "order", {"order_type": "Random"}) + + def create_presentation_material(self, section_el, section_text): + sec_pres_mat = ET.SubElement(section_el, "presentation_material") + sec_pres_mat_flo = ET.SubElement(sec_pres_mat, "flow_mat") + sec_pres_mat_flo_flo = ET.SubElement(sec_pres_mat_flo, "flow_mat") + sec_pres_mat_flo_flo_mat = ET.SubElement(sec_pres_mat_flo_flo, "material") + sec_pres_mat_flo_flo_mat_text = ET.SubElement(sec_pres_mat_flo_flo_mat, "mattext", {"texttype": "text/html"}) + if section_text: + sec_pres_mat_flo_flo_mat_text.append(CDATA(section_text)) + + def create_sectionproc_extension(self, section_el, section_obj): + sec_proc = ET.SubElement(section_el, "sectionproc_extension") + sec_proc_dis_name = ET.SubElement(sec_proc, "d2l_2p0:display_section_name") + sec_proc_dis_name.text = "yes" if section_obj.is_title_displayed in (None, True) else "no" + sec_proc_dis_line = ET.SubElement(sec_proc, "d2l_2p0:display_section_line") + sec_proc_dis_line.text = "no" + sec_proc_dis_sec = ET.SubElement(sec_proc, "d2l_2p0:type_display_section") + if section_obj.is_text_displayed is None: + sec_proc_dis_sec.text = "0" + else: + sec_proc_dis_sec.text = "1" if section_obj.is_text_displayed else "0" + + def create_questions(self, section_el, question_objs): + for question in question_objs: + time_ns = str(time.process_time_ns()) + random_int = str(random.randint(1000000, 9999999)) + ident = time_ns + random_int + question_ident = "QUES_" + ident + item_el = ET.Element( + "item", + { + "ident": "OBJ_" + ident, + "label": question_ident, + "d2l_2p0:page": "1", + "title": self._safe_attr(question.title), + }, + ) + question_type = question.questiontype + match question_type: + case "MC": + self.generate_multiple_choice(item_el, question_ident, question) + case "TF": + self.generate_true_false(item_el, question_ident, question) + case "FIB" | "FMB": + self.generate_fill_in_the_blanks(item_el, question_ident, question) + case "MS" | "MR": + self.generate_multi_select(item_el, question_ident, question) + case "MAT" | "MT": + self.generate_matching(item_el, question_ident, question) + case "ORD": + self.generate_ordering(item_el, question_ident, question) + case "WR" | "E": + self.generate_written_response(item_el, question_ident, question) + + section_el.append(item_el) + + def xml_to_string(self, xml): + rough_string = ET.tostring(xml, "utf-8") + reparsed = parseString(rough_string) + pretty_xml = reparsed.toprettyxml(indent="\t") + return pretty_xml diff --git a/api/formats/scorm/xml_builders/__init__.py b/api/formats/scorm/xml_builders/__init__.py new file mode 100644 index 0000000..1d942aa --- /dev/null +++ b/api/formats/scorm/xml_builders/__init__.py @@ -0,0 +1,19 @@ +from .base import BaseQuestionBuilder +from .multiple_choice import MultipleChoiceBuilder +from .true_false import TrueFalseBuilder +from .fib import FillInTheBlanksBuilder +from .multi_select import MultiSelectBuilder +from .matching import MatchingBuilder +from .ordering import OrderingBuilder +from .written_response import WrittenResponseBuilder + +__all__ = [ + "BaseQuestionBuilder", + "MultipleChoiceBuilder", + "TrueFalseBuilder", + "FillInTheBlanksBuilder", + "MultiSelectBuilder", + "MatchingBuilder", + "OrderingBuilder", + "WrittenResponseBuilder", +] diff --git a/api/formats/scorm/xml_builders/base.py b/api/formats/scorm/xml_builders/base.py new file mode 100644 index 0000000..47f54f4 --- /dev/null +++ b/api/formats/scorm/xml_builders/base.py @@ -0,0 +1,57 @@ +import xml.etree.cElementTree as ET +from xml.dom.minidom import parseString + +from ..xmlcdata import CDATA + + +class BaseQuestionBuilder: + def itemetadata(self, it, question_type, question): + it_metadata = ET.SubElement(it, "itemmetadata") + it_metadata_qtidata = ET.SubElement(it_metadata, "qtimetadata") + it_computer_scored = ET.SubElement(it_metadata_qtidata, "qti_metadatafield") + it_computer_scored_label = ET.SubElement(it_computer_scored, "fieldlabel") + it_computer_scored_label.text = "qmd_computerscored" + it_computer_scored_entry = ET.SubElement(it_computer_scored, "fieldentry") + it_computer_scored_entry.text = "yes" + it_question_type = ET.SubElement(it_metadata_qtidata, "qti_metadatafield") + it_question_type_label = ET.SubElement(it_question_type, "fieldlabel") + it_question_type_label.text = "qmd_questiontype" + it_question_type_entry = ET.SubElement(it_question_type, "fieldentry") + it_question_type_entry.text = question_type + it_weighting = ET.SubElement(it_metadata_qtidata, "qti_metadatafield") + it_weighting_label = ET.SubElement(it_weighting, "fieldlabel") + it_weighting_label.text = "qmd_weighting" + it_weighting_entry = ET.SubElement(it_weighting, "fieldentry") + points = question.points if question.points is not None else 1 + it_weighting_entry.text = "{:.4f}".format(points) + + def itemproc_extension(self, it): + it_proc = ET.SubElement(it, "itemproc_extension") + it_proc_difficulty = ET.SubElement(it_proc, "d2l_2p0:difficulty") + it_proc_difficulty.text = "1" + it_proc_isbonus = ET.SubElement(it_proc, "d2l_2p0:isbonus") + it_proc_isbonus.text = "no" + it_proc_ismandatory = ET.SubElement(it_proc, "d2l_2p0:ismandatory") + it_proc_ismandatory.text = "no" + + def generate_feedback(self, it, ident, feedback): + it_fb = ET.SubElement(it, "itemfeedback", {"ident": ident}) + it_fb_mat = ET.SubElement(it_fb, "material") + it_fb_mat_text = ET.SubElement(it_fb_mat, "mattext", {"texttype": "text/html"}) + it_fb_mat_text.append(CDATA(feedback)) + + def generate_hint(self, it, hint): + it_hint = ET.SubElement(it, "hint") + it_hint_mat = ET.SubElement(it_hint, "hintmaterial") + it_hint_mat_flow = ET.SubElement(it_hint_mat, "flow_mat") + it_hint_mat_flow_mat = ET.SubElement(it_hint_mat_flow, "material") + it_hint_mat_flow_text = ET.SubElement( + it_hint_mat_flow_mat, "mattext", {"texttype": "text/html"} + ) + it_hint_mat_flow_text.append(CDATA(hint)) + + def xml_to_string(self, xml): + rough_string = ET.tostring(xml, "utf-8") + reparsed = parseString(rough_string) + pretty_xml = reparsed.toprettyxml(indent="\t") + return pretty_xml diff --git a/api/formats/scorm/xml_builders/fib.py b/api/formats/scorm/xml_builders/fib.py new file mode 100644 index 0000000..6f4cf68 --- /dev/null +++ b/api/formats/scorm/xml_builders/fib.py @@ -0,0 +1,71 @@ +import xml.etree.cElementTree as ET + +from ..xmlcdata import CDATA + + +class FillInTheBlanksBuilder: + def generate_fill_in_the_blanks(self, it, question_ident, question): + self.itemetadata(it, "Fill in the Blanks", question) + self.itemproc_extension(it) + + it_pre = ET.SubElement(it, "presentation") + it_pre_flow = ET.SubElement(it_pre, "flow") + + idx = 1 + for fib in question.get_fibs(): + question_str = question_ident + str(idx) + "_STR" + question_ans = question_ident + str(idx) + "_ANS" + if fib.type == "fibanswer": + it_pre_flow_str = ET.SubElement( + it_pre_flow, "response_str", {"rcardinality": "Single", "ident": question_str} + ) + it_pre_flow_str_render = ET.SubElement( + it_pre_flow_str, + "render_fib", + {"fibtype": "String", "prompt": "Box", "columns": "30", "rows": "1"}, + ) + ET.SubElement(it_pre_flow_str_render, "response_label", {"ident": question_ans}) + idx += 1 + elif fib.type == "fibquestion": + it_pre_flow_mat = ET.SubElement(it_pre_flow, "material") + it_pre_flow_mat_text = ET.SubElement(it_pre_flow_mat, "mattext", {"texttype": "text/html"}) + question_text = fib.text + it_pre_flow_mat_text.append(CDATA(question_text)) + + if question.hint: + self.generate_hint(it, question.hint) + + it_res = ET.SubElement(it, "resprocessing") + it_out = ET.SubElement(it_res, "outcomes") + + index = 1 + fib_answers_qs = list(question.get_fib_answers() or []) + if not fib_answers_qs: + return + answer_weight = str(100.0 / len(fib_answers_qs)) + for fib_answers in fib_answers_qs: + if not fib_answers.text: + index += 1 + continue + answers = [a.strip() for a in fib_answers.text.split(",") if a.strip()] + question_ans = question_ident + str(index) + "_ANS" + for answer in answers: + it_res_con = ET.SubElement(it_res, "respcondition") + it_res_con_var = ET.SubElement(it_res_con, "conditionvar") + it_res_con_var_equal = ET.SubElement( + it_res_con_var, "varequal", {"case": "no", "respident": question_ans} + ) + it_res_con_var_equal.text = answer + it_res_set_var = ET.SubElement(it_res_con, "setvar", {"action": "Set"}) + it_res_set_var.text = answer_weight + + ET.SubElement( + it_out, + "decvar", + {"varname": "Blank_" + str(index), "maxvalue": "100", "minvalue": "0", "vartype": "Integer"}, + ) + + index += 1 + + if question.feedback: + self.generate_feedback(it, question_ident, question.feedback) diff --git a/api/formats/scorm/xml_builders/matching.py b/api/formats/scorm/xml_builders/matching.py new file mode 100644 index 0000000..f3576c0 --- /dev/null +++ b/api/formats/scorm/xml_builders/matching.py @@ -0,0 +1,122 @@ +import copy +import xml.etree.cElementTree as ET + +from ..xmlcdata import CDATA + + +class MatchingBuilder: + def generate_matching(self, it, question_ident, question): + self.itemetadata(it, "Matching", question) + self.itemproc_extension(it) + matching = question.get_matching() + question_ident_choice = question_ident + "_C" + question_ident_answer = question_ident + "_A" + + it_pre = ET.SubElement(it, "presentation") + it_pre_flow = ET.SubElement(it_pre, "flow") + + if question.hint: + self.generate_hint(it, question.hint) + + it_res = ET.SubElement(it, "resprocessing") + it_res_out = ET.SubElement(it_res, "outcomes") + ET.SubElement(it_res_out, "decvar", {"vartype": "Integer", "defaultval": "0", "varname": "D2L_Correct", "minvalue": "0", "maxvalue": "100"}) + ET.SubElement(it_res_out, "decvar", {"vartype": "Integer", "defaultval": "0", "varname": "D2L_Incorrect", "minvalue": "0", "maxvalue": "100"}) + ET.SubElement(it_res_out, "decvar", {"vartype": "Decimal", "defaultval": "0", "varname": "que_score", "minvalue": "0", "maxvalue": "100"}) + + it_pre_flow_mat = ET.SubElement(it_pre_flow, "material") + it_pre_flow_mat_text = ET.SubElement(it_pre_flow_mat, "mattext", {"texttype": "text/html"}) + question_text = question.text + it_pre_flow_mat_text.append(CDATA(question_text)) + + it_pre_flow_res = ET.SubElement(it_pre_flow, "response_extension") + it_pre_flow_res_grading_type = ET.SubElement(it_pre_flow_res, "d2l_2p0:grading_type") + it_pre_flow_res_grading_type.text = "2" + + it_pre_flow_res_grp_ren = ET.Element("render_choice", {"shuffle": "yes"}) + it_pre_flow_res_grp_ren_flow = ET.SubElement(it_pre_flow_res_grp_ren, "flow_label", {"class": "Block"}) + + it_temp = ET.Element("temp") + matching_answers = matching.get_unique_matching_answers() + + ma_index = 1 + for matching_answer_text in matching_answers: + matching_answer_index = question_ident_answer + str(ma_index) + it_grp_ren_flow_lab = ET.SubElement(it_pre_flow_res_grp_ren_flow, "response_label", {"ident": matching_answer_index}) + it_grp_ren_flow_lab_flow = ET.SubElement(it_grp_ren_flow_lab, "flow_mat") + it_grp_ren_flow_lab_flow_mat = ET.SubElement(it_grp_ren_flow_lab_flow, "material") + it_grp_ren_flow_lab_flow_mat_text = ET.SubElement(it_grp_ren_flow_lab_flow_mat, "mattext", {"texttype": "text/html"}) + it_grp_ren_flow_lab_flow_mat_text.append(CDATA(matching_answer_text)) + + it_respcondition = ET.SubElement(it_temp, "respcondition") + it_respcondition_conditionvar = ET.SubElement(it_respcondition, "conditionvar") + it_respcondition_varequal = ET.SubElement(it_respcondition_conditionvar, "varequal") + it_respcondition_varequal.text = matching_answer_index + it_respcondition_setvar = ET.SubElement(it_respcondition, "setvar", {"action": "Add"}) + it_respcondition_setvar.text = "1" + + ma_index += 1 + + mc_index = 1 + for matching_choice in matching.get_matching_choices(): + matching_choice_index = question_ident_choice + str(mc_index) + + it_pre_flow_res_grp = ET.SubElement(it_pre_flow, "response_grp", {"respident": matching_choice_index, "rcardinality": "Single"}) + it_pre_flow_res_grp_mat = ET.SubElement(it_pre_flow_res_grp, "material") + it_pre_flow_res_grp_mattext = ET.SubElement(it_pre_flow_res_grp_mat, "mattext", {"texttype": "text/html"}) + it_pre_flow_res_grp_mattext.append(CDATA(matching_choice.choice_text)) + it_pre_flow_res_grp.append(it_pre_flow_res_grp_ren) + + for respcondition in it_temp: + conditionvar = respcondition.find("conditionvar") + varequal = conditionvar.find("varequal") + varequal.set("respident", matching_choice_index) + setvar = respcondition.find("setvar") + answer_mattext = it_pre_flow.find( + "response_grp[@respident='" + matching_choice_index + "'].//response_label[@ident='" + varequal.text + "'].//mattext" + ) + is_correct = matching_choice.has_matching_answer(answer_mattext[0].text) + if is_correct is True: + setvar.set("varname", "D2L_Correct") + else: + setvar.set("varname", "D2L_Incorrect") + it_res.append(copy.deepcopy(respcondition)) + mc_index += 1 + + match matching.grading_type: + case 0: + it_respcondition = ET.SubElement(it_res, "respcondition") + it_respcondition_var = ET.SubElement(it_respcondition, "conditionvar") + ET.SubElement(it_respcondition_var, "other") + it_resp_setvar = ET.SubElement(it_respcondition, "setvar", {"varname": "que_score", "action": "Set"}) + it_resp_setvar.text = "D2L_Correct" + case 1: + it_respcondition = ET.SubElement(it_res, "respcondition") + it_respcondition_var = ET.SubElement(it_respcondition, "conditionvar") + it_respcondition_var_vargte = ET.SubElement(it_respcondition_var, "vargte", {"respident": "D2L_Incorrect"}) + it_respcondition_var_vargte.text = "0" + it_resp_setvar = ET.SubElement(it_respcondition, "setvar", {"varname": "que_score", "action": "Set"}) + it_resp_setvar.text = "0" + + it_respcondition2 = copy.deepcopy(it_respcondition) + it_resp_setvar2 = it_respcondition2.find("setvar") + it_resp_setvar2.text = "1" + it_res.append(it_respcondition2) + case 2: + it_respcondition = ET.SubElement(it_res, "respcondition") + it_respcondition_var = ET.SubElement(it_respcondition, "conditionvar") + it_respcondition_var_vargte = ET.SubElement(it_respcondition_var, "vargte", {"respident": "D2L_Incorrect"}) + it_respcondition_var_vargte.text = "D2L_Correct" + it_resp_setvar = ET.SubElement(it_respcondition, "setvar", {"varname": "que_score", "action": "Set"}) + it_resp_setvar.text = "0" + + it_respcondition2 = ET.SubElement(it_res, "respcondition") + it_respcondition_var2 = ET.SubElement(it_respcondition2, "conditionvar") + ET.SubElement(it_respcondition_var2, "varlt", {"respident": "D2L_Incorrect"}) + it_resp_setvar2 = ET.SubElement(it_respcondition2, "setvar", {"varname": "que_score", "action": "Set"}) + it_resp_setvar2.text = "D2L_Correct" + it_resp_setvar3 = ET.SubElement(it_respcondition2, "setvar", {"varname": "que_score", "action": "Subtract"}) + it_resp_setvar3.text = "D2L_Incorrect" + + if question.feedback: + self.generate_feedback(it, question_ident, question.feedback) diff --git a/api/formats/scorm/xml_builders/multi_select.py b/api/formats/scorm/xml_builders/multi_select.py new file mode 100644 index 0000000..2469359 --- /dev/null +++ b/api/formats/scorm/xml_builders/multi_select.py @@ -0,0 +1,77 @@ +import xml.etree.cElementTree as ET + +from ..xmlcdata import CDATA + + +class MultiSelectBuilder: + def generate_multi_select(self, it, question_ident, question): + self.itemetadata(it, "Multi-Select", question) + self.itemproc_extension(it) + + question_lid = question_ident + "_LID" + question_ident_answer = question_ident + "_A" + question_ident_feedback = question_ident + "_IF" + + it_pre = ET.SubElement(it, "presentation") + it_pre_flow = ET.SubElement(it_pre, "flow") + it_pre_flow_mat = ET.SubElement(it_pre_flow, "material") + + multiple_select = question.get_multiple_select() + it_pre_flow_mat_text = ET.SubElement(it_pre_flow_mat, "mattext", {"texttype": "text/html"}) + question_text = question.text + it_pre_flow_mat_text.append(CDATA(question_text)) + + it_pre_flow_res = ET.SubElement(it_pre_flow, "response_extension") + it_pre_flow_res_display_style = ET.SubElement(it_pre_flow_res, "d2l_2p0:display_style") + it_pre_flow_res_display_style.text = "2" + it_pre_flow_res_enumeration = ET.SubElement(it_pre_flow_res, "d2l_2p0:enumeration") + it_pre_flow_res_enumeration.text = str(multiple_select.enumeration) if multiple_select.enumeration else "4" + it_pre_flow_res_grading_type = ET.SubElement(it_pre_flow_res, "d2l_2p0:grading_type") + it_pre_flow_res_grading_type.text = "2" + + it_pre_flow_lid = ET.SubElement(it_pre_flow, "response_lid", {"ident": question_lid, "rcardinality": "Multiple"}) + it_pre_flow_lid_render_choice = ET.SubElement( + it_pre_flow_lid, "render_choice", {"shuffle": ("yes" if multiple_select.randomize else "no")} + ) + + if question.hint: + self.generate_hint(it, question.hint) + + it_res = ET.SubElement(it, "resprocessing") + it_out = ET.SubElement(it_res, "outcomes") + ET.SubElement( + it_out, + "decvar", + {"vartype": "Integer", "defaultval": "0", "varname": "que_score", "minvalue": "0", "maxvalue": "100"}, + ) + ET.SubElement(it_out, "decvar", {"vartype": "Integer", "defaultval": "0", "varname": "D2L_Correct", "minvalue": "0"}) + ET.SubElement(it_out, "decvar", {"vartype": "Integer", "defaultval": "0", "varname": "D2L_Incorrect", "minvalue": "0"}) + + if question.feedback: + self.generate_feedback(it, question_ident, question.feedback) + + ms_index = 1 + for ms_answer in multiple_select.get_multiple_select_answers(): + flow = ET.SubElement(it_pre_flow_lid_render_choice, "flow_label", {"class": "Block"}) + response_label = ET.SubElement(flow, "response_label", {"ident": question_ident_answer + str(ms_index)}) + flow_mat = ET.SubElement(response_label, "flow_mat") + material = ET.SubElement(flow_mat, "material") + mattext = ET.SubElement(material, "mattext", {"texttype": "text/html"}) + mattext.text = ms_answer.answer + + it_res_con = ET.SubElement(it_res, "respcondition", {"title": "Response Condition", "continue": "yes"}) + it_res_con_var = ET.SubElement(it_res_con, "conditionvar") + it_res_con_var_equal = ET.SubElement(it_res_con_var, "varequal", {"respident": question_lid}) + it_res_con_var_equal.text = question_ident_answer + str(ms_index) + if ms_answer.is_correct is True: + ET.SubElement(it_res_con, "setvar", {"varname": "D2L_Correct", "action": "Add"}) + else: + ET.SubElement(it_res_con, "setvar", {"varname": "D2L_Incorrect", "action": "Add"}) + + if ms_answer.answer_feedback: + self.generate_feedback(it, question_ident_feedback + str(ms_index), ms_answer.answer_feedback) + ms_index += 1 + + it_res_con = ET.SubElement(it_res, "respcondition") + it_res_set_var = ET.SubElement(it_res_con, "setvar", {"varname": "que_score", "action": "Set"}) + it_res_set_var.text = "D2L_Correct" diff --git a/api/formats/scorm/xml_builders/multiple_choice.py b/api/formats/scorm/xml_builders/multiple_choice.py new file mode 100644 index 0000000..5a4c95c --- /dev/null +++ b/api/formats/scorm/xml_builders/multiple_choice.py @@ -0,0 +1,66 @@ +import xml.etree.cElementTree as ET + +from ..xmlcdata import CDATA + + +class MultipleChoiceBuilder: + def generate_multiple_choice(self, it, question_ident, question): + self.itemetadata(it, "Multiple Choice", question) + self.itemproc_extension(it) + question_lid = question_ident + "_LID" + question_ident_answer = question_ident + "_A" + question_ident_feedback = question_ident + "_IF" + + it_pre = ET.SubElement(it, "presentation") + it_pre_flow = ET.SubElement(it_pre, "flow") + it_pre_flow_mat = ET.SubElement(it_pre_flow, "material") + + multiple_choice = question.get_multiple_choice() + it_pre_flow_mat_text = ET.SubElement(it_pre_flow_mat, "mattext", {"texttype": "text/html"}) + question_text = question.text + it_pre_flow_mat_text.append(CDATA(question_text)) + + it_pre_flow_res = ET.SubElement(it_pre_flow, "response_extension") + it_pre_flow_res_display_style = ET.SubElement(it_pre_flow_res, "d2l_2p0:display_style") + it_pre_flow_res_display_style.text = "2" + it_pre_flow_res_enumeration = ET.SubElement(it_pre_flow_res, "d2l_2p0:enumeration") + it_pre_flow_res_enumeration.text = str(multiple_choice.enumeration) if multiple_choice.enumeration else "4" + it_pre_flow_res_grading_type = ET.SubElement(it_pre_flow_res, "d2l_2p0:grading_type") + it_pre_flow_res_grading_type.text = "0" + it_pre_flow_lid = ET.SubElement(it_pre_flow, "response_lid", {"ident": question_lid, "rcardinality": "Multiple"}) + it_pre_flow_lid_render_choice = ET.SubElement( + it_pre_flow_lid, "render_choice", {"shuffle": ("yes" if multiple_choice.randomize else "no")} + ) + + if question.hint: + self.generate_hint(it, question.hint) + + it_res = ET.SubElement(it, "resprocessing") + + if question.feedback: + self.generate_feedback(it, question_ident, question.feedback) + + mc_answer_index = 1 + for mc_answer in multiple_choice.get_multiple_choice_answers(): + flow = ET.SubElement(it_pre_flow_lid_render_choice, "flow_label", {"class": "Block"}) + response_label = ET.SubElement(flow, "response_label", {"ident": question_ident_answer + str(mc_answer_index)}) + flow_mat = ET.SubElement(response_label, "flow_mat") + material = ET.SubElement(flow_mat, "material") + mattext = ET.SubElement(material, "mattext", {"texttype": "text/html"}) + mattext.append(CDATA(mc_answer.answer)) + + it_res_con = ET.SubElement(it_res, "respcondition", {"title": "Response Condition" + str(mc_answer_index)}) + it_res_con_var = ET.SubElement(it_res_con, "conditionvar") + it_res_con_var_equal = ET.SubElement(it_res_con_var, "varequal", {"respident": question_lid}) + it_res_con_var_equal.text = question_ident_answer + str(mc_answer_index) + it_res_set_var = ET.SubElement(it_res_con, "setvar", {"action": "Set"}) + it_res_set_var.text = str(mc_answer.weight) if mc_answer.weight else "0.0000" + ET.SubElement( + it_res_con, + "displayfeedback", + {"feedbacktype": "Response", "linkrefid": question_ident_feedback + str(mc_answer_index)}, + ) + + if mc_answer.answer_feedback: + self.generate_feedback(it, question_ident_feedback + str(mc_answer_index), mc_answer.answer_feedback) + mc_answer_index += 1 diff --git a/api/formats/scorm/xml_builders/ordering.py b/api/formats/scorm/xml_builders/ordering.py new file mode 100644 index 0000000..73dfc40 --- /dev/null +++ b/api/formats/scorm/xml_builders/ordering.py @@ -0,0 +1,86 @@ +import xml.etree.cElementTree as ET + +from ..xmlcdata import CDATA + + +class OrderingBuilder: + def generate_ordering(self, it, question_ident, question): + self.itemetadata(it, "Ordering", question) + self.itemproc_extension(it) + + question_o = question_ident + "_O" + question_ident_feedback = question_ident + "_IF" + + it_pre = ET.SubElement(it, "presentation") + it_pre_flow = ET.SubElement(it_pre, "flow") + + it_pre_flow_mat = ET.SubElement(it_pre_flow, "material") + it_pre_flow_mat_text = ET.SubElement(it_pre_flow_mat, "mattext", {"texttype": "text/html"}) + question_text = question.text + it_pre_flow_mat_text.append(CDATA(question_text)) + + it_pre_flow_res_ext = ET.SubElement(it_pre_flow, "response_extension") + it_pre_flow_res_ext_grading = ET.SubElement(it_pre_flow_res_ext, "d2l_2p0:grading_type") + grading_type = 2 + it_pre_flow_res_ext_grading.append(CDATA(grading_type)) + + it_pre_flow_res_grp = ET.SubElement(it_pre_flow, "response_grp", {"ident": question_o, "rcardinality": "Ordered"}) + it_pre_flow_res_grp_render = ET.SubElement(it_pre_flow_res_grp, "render_choice", {"shuffle": "yes"}) + it_pre_flow_res_grp_render_flow = ET.SubElement(it_pre_flow_res_grp_render, "flow_label", {"class": "Block"}) + + if question.hint: + self.generate_hint(it, question.hint) + + it_res = ET.SubElement(it, "resprocessing") + it_out = ET.SubElement(it_res, "outcomes") + ET.SubElement(it_out, "decvar", {"maxvalue": "100", "minvalue": "0", "varname": "D2L_Correct", "defaultval": "0", "vartype": "Integer"}) + ET.SubElement(it_out, "decvar", {"minvalue": "0", "varname": "D2L_Incorrect", "defaultval": "0", "vartype": "Integer"}) + ET.SubElement(it_out, "decvar", {"minvalue": "0", "varname": "que_score", "defaultval": "0", "vartype": "Integer"}) + + it_res_con_other = ET.SubElement(it_res, "respcondition") + it_res_con_other_var = ET.SubElement(it_res_con_other, "conditionvar") + ET.SubElement(it_res_con_other_var, "other") + it_res_con_other_setvar = ET.SubElement(it_res_con_other, "setvar", {"varname": "que_score", "action": "Set"}) + it_res_con_other_setvar.text = "D2L_Correct" + + if question.feedback: + self.generate_feedback(it, question_ident, question.feedback) + + ord_index = 1 + for ord in question.get_orderings(): + ident_num = question_o + str(ord_index) + it_pre_flow_res_grp_render_flow_res = ET.SubElement( + it_pre_flow_res_grp_render_flow, "response_label", {"ident": ident_num} + ) + it_pre_flow_res_grp_render_flow_res_flow = ET.SubElement(it_pre_flow_res_grp_render_flow_res, "flow_mat") + it_pre_flow_res_grp_render_flow_res_flow_mat = ET.SubElement( + it_pre_flow_res_grp_render_flow_res_flow, "material" + ) + it_pre_flow_res_grp_render_flow_res_flow_mat_text = ET.SubElement( + it_pre_flow_res_grp_render_flow_res_flow_mat, "mattext", {"texttype": "text/html"} + ) + question_text = ord.text + it_pre_flow_res_grp_render_flow_res_flow_mat_text.append(CDATA(question_text)) + + it_res_con_correct = ET.SubElement(it_res, "respcondition", {"title": "Correct Condition"}) + it_res_con_correct_var = ET.SubElement(it_res_con_correct, "conditionvar") + it_res_con_correct_var_equal = ET.SubElement(it_res_con_correct_var, "varequal", {"respident": ident_num}) + it_res_con_correct_var_equal.text = str(ord_index) + it_res_con_correct_setvar = ET.SubElement(it_res_con_correct, "setvar", {"varname": "D2L_Correct", "action": "Add"}) + it_res_con_correct_setvar.text = str(1) + + it_res_con_incorrect = ET.SubElement(it_res, "respcondition", {"title": "Incorrect Condition"}) + it_res_con_incorrect_var = ET.SubElement(it_res_con_incorrect, "conditionvar") + it_res_con_incorrect_var_not = ET.SubElement(it_res_con_incorrect_var, "not") + it_res_con_incorrect_var_not_equal = ET.SubElement( + it_res_con_incorrect_var_not, "varequal", {"respident": ident_num} + ) + it_res_con_incorrect_var_not_equal.text = str(ord_index) + it_res_con_incorrect_setvar = ET.SubElement( + it_res_con_incorrect, "setvar", {"varname": "D2L_Incorrect", "action": "Add"} + ) + it_res_con_incorrect_setvar.text = str(1) + + if ord.ord_feedback: + self.generate_feedback(it, question_ident_feedback + str(ord_index), ord.ord_feedback) + ord_index += 1 diff --git a/api/formats/scorm/xml_builders/true_false.py b/api/formats/scorm/xml_builders/true_false.py new file mode 100644 index 0000000..3f9be58 --- /dev/null +++ b/api/formats/scorm/xml_builders/true_false.py @@ -0,0 +1,72 @@ +import xml.etree.cElementTree as ET + +from ..xmlcdata import CDATA + + +class TrueFalseBuilder: + def generate_true_false(self, it, question_ident, question): + self.itemetadata(it, "True/False", question) + self.itemproc_extension(it) + + question_lid = question_ident + "_LID" + question_ident_answer = question_ident + "_A" + question_ident_feedback = question_ident + "_IF" + + it_pre = ET.SubElement(it, "presentation") + it_pre_flow = ET.SubElement(it_pre, "flow") + it_pre_flow_mat = ET.SubElement(it_pre_flow, "material") + + true_false = question.get_true_false() + it_pre_flow_mat_text = ET.SubElement(it_pre_flow_mat, "mattext", {"texttype": "text/html"}) + question_text = question.text + it_pre_flow_mat_text.append(CDATA(question_text)) + + it_pre_flow_res = ET.SubElement(it_pre_flow, "response_extension") + it_pre_flow_res_display_style = ET.SubElement(it_pre_flow_res, "d2l_2p0:display_style") + it_pre_flow_res_display_style.text = "2" + it_pre_flow_res_enumeration = ET.SubElement(it_pre_flow_res, "d2l_2p0:enumeration") + it_pre_flow_res_enumeration.text = str(true_false.enumeration) if true_false.enumeration else "4" + it_pre_flow_res_grading_type = ET.SubElement(it_pre_flow_res, "d2l_2p0:grading_type") + it_pre_flow_res_grading_type.text = "0" + + it_pre_flow_lid = ET.SubElement(it_pre_flow, "response_lid", {"ident": question_lid, "rcardinality": "Single"}) + it_pre_flow_lid_render_choice = ET.SubElement(it_pre_flow_lid, "render_choice", {"shuffle": "no"}) + + it_res = ET.SubElement(it, "resprocessing") + + if question.feedback: + self.generate_feedback(it, question_ident, question.feedback) + + tf_index = 0 + answer_text = ["True", "False"] + while tf_index < 2: + flow = ET.SubElement(it_pre_flow_lid_render_choice, "flow_label", {"class": "Block"}) + response_label = ET.SubElement(flow, "response_label", {"ident": question_ident_answer + str(tf_index)}) + flow_mat = ET.SubElement(response_label, "flow_mat") + material = ET.SubElement(flow_mat, "material") + mattext = ET.SubElement(material, "mattext", {"texttype": "text/plain"}) + mattext.text = answer_text[tf_index] + + it_res_con = ET.SubElement(it_res, "respcondition", {"title": "Response Condition" + str(tf_index)}) + it_res_con_var = ET.SubElement(it_res_con, "conditionvar") + it_res_con_var_equal = ET.SubElement(it_res_con_var, "varequal", {"respident": question_lid}) + it_res_con_var_equal.text = question_ident_answer + str(tf_index) + it_res_set_var = ET.SubElement(it_res_con, "setvar", {"action": "Set"}) + + if tf_index == 0: + current_weight = true_false.true_weight + current_feedback = true_false.true_feedback + else: + current_weight = true_false.false_weight + current_feedback = true_false.false_feedback + + it_res_set_var.text = str(current_weight) if current_weight else "0.0000" + ET.SubElement( + it_res_con, + "displayfeedback", + {"feedbacktype": "Response", "linkrefid": question_ident_feedback + str(tf_index)}, + ) + + if current_feedback: + self.generate_feedback(it, question_ident_feedback + str(tf_index), current_feedback) + tf_index += 1 diff --git a/api/formats/scorm/xml_builders/written_response.py b/api/formats/scorm/xml_builders/written_response.py new file mode 100644 index 0000000..64dbda6 --- /dev/null +++ b/api/formats/scorm/xml_builders/written_response.py @@ -0,0 +1,56 @@ +import xml.etree.cElementTree as ET + +from ..xmlcdata import CDATA + + +class WrittenResponseBuilder: + def generate_written_response(self, it, question_ident, question): + self.itemetadata(it, "Long Answer", question) + self.itemproc_extension(it) + + question_ident_str = question_ident + "_STR" + question_ident_la = question_ident + "_LA" + + it_pre = ET.SubElement(it, "presentation") + it_pre_flow = ET.SubElement(it_pre, "flow") + + written_response = question.get_written_response() + + it_pre_flow_mat = ET.SubElement(it_pre_flow, "material") + it_pre_flow_mat_text = ET.SubElement(it_pre_flow_mat, "mattext", {"texttype": "text/html"}) + question_text = question.text + it_pre_flow_mat_text.append(CDATA(question_text)) + + it_pre_flow_mat_res_ext = ET.SubElement(it_pre_flow, "response_extension") + it_pre_flow_mat_res_ext_sign = ET.SubElement(it_pre_flow_mat_res_ext, "d2l_2p0:has_signed_comments") + it_pre_flow_mat_res_ext_sign.append(CDATA("no")) + it_pre_flow_mat_res_ext_editor = ET.SubElement(it_pre_flow_mat_res_ext, "d2l_2p0:has_htmleditor") + it_pre_flow_mat_res_ext_editor.append(CDATA("no")) + + it_pre_flow_mat_res_str = ET.SubElement( + it_pre_flow, "response_str", {"rcardinality": "Multiple", "ident": question_ident_str} + ) + it_pre_flow_mat_res_str_render = ET.SubElement( + it_pre_flow_mat_res_str, "render_fib", {"fibtype": "String", "prompt": "Box", "columns": "100", "rows": "15"} + ) + it_pre_flow_mat_res_str_render_label = ET.SubElement( + it_pre_flow_mat_res_str_render, "response_label", {"ident": question_ident_la} + ) + it_pre_flow_mat_res_str_render_label_mat = ET.SubElement(it_pre_flow_mat_res_str_render_label, "material") + ET.SubElement(it_pre_flow_mat_res_str_render_label_mat, "mattext", {"texttype": "text/html"}) + + if question.hint: + self.generate_hint(it, question.hint) + if question.feedback: + self.generate_feedback(it, question_ident, question.feedback) + it_init_text = ET.SubElement(it, "initial_text") + it_init_text_mat = ET.SubElement(it_init_text, "initial_text_material") + it_init_text_mat_flow = ET.SubElement(it_init_text_mat, "flow_mat") + it_init_text_mat_flow_mat = ET.SubElement(it_init_text_mat_flow, "material") + ET.SubElement(it_init_text_mat_flow_mat, "mattext", {"texttype": "text/html"}) + it_ans = ET.SubElement(it, "answer_key") + it_ans_mat = ET.SubElement(it_ans, "answer_key_material") + it_ans_mat_flow = ET.SubElement(it_ans_mat, "flow_mat") + it_ans_mat_flow_mat = ET.SubElement(it_ans_mat_flow, "material") + it_ans_mat_flow_mat_text = ET.SubElement(it_ans_mat_flow_mat, "mattext", {"texttype": "text/html"}) + it_ans_mat_flow_mat_text.append(CDATA(written_response.answer_key)) diff --git a/api/scorm/xmlcdata.py b/api/formats/scorm/xmlcdata.py similarity index 81% rename from api/scorm/xmlcdata.py rename to api/formats/scorm/xmlcdata.py index 2971d53..c75bdfc 100644 --- a/api/scorm/xmlcdata.py +++ b/api/formats/scorm/xmlcdata.py @@ -16,8 +16,7 @@ def CDATA(text=None): def _serialize_xml2(write, elem, encoding, qnames, namespaces, orig=ET._serialize_xml): if elem.tag == '![CDATA[': - write("\n<%s%s]]>\n" % \ - (elem.tag, elem.text.encode(encoding, "xmlcharrefreplace"))) + write("\n<%s%s]]>\n" % (elem.tag, elem.text.encode(encoding, "xmlcharrefreplace"))) return return orig(write, elem, encoding, qnames, namespaces) @@ -36,8 +35,6 @@ def _serialize_xml3(write, elem, qnames, namespaces, return orig(write, elem, qnames, namespaces) if six.PY3: - ET._serialize_xml = \ - ET._serialize['xml'] = _serialize_xml3 + ET._serialize_xml = ET._serialize["xml"] = _serialize_xml3 elif six.PY2: - ET._serialize_xml = \ - ET._serialize['xml'] = _serialize_xml2 \ No newline at end of file + ET._serialize_xml = ET._serialize["xml"] = _serialize_xml2 \ No newline at end of file diff --git a/api/models.py b/api/models.py index 2b41e69..6ac7d31 100644 --- a/api/models.py +++ b/api/models.py @@ -6,8 +6,9 @@ # import pypandoc from datetime import datetime -from .scorm.XmlWriter import XmlWriter -from .scorm.manifest import ManifestEntity, ManifestResourceEntity +from .formats.scorm.scorm_writer import ScormWriter +from .formats.scorm.manifest_builder import build_manifest +from .formats.scorm.manifest import ManifestEntity, ManifestResourceEntity from xml.dom.minidom import parseString import xml.etree.cElementTree as ET @@ -28,6 +29,7 @@ import logging +import traceback newlogger = logging.getLogger(__name__) from .logging.logging_adapter import FilenameLoggingAdapter @@ -49,6 +51,7 @@ class QuestionLibrary(models.Model): image_path = models.FilePathField(path=None, match=None, recursive=False, max_length=None) shuffle = models.BooleanField(blank=True, null=True) main_title = models.TextField(blank=True, null=True) + main_text = models.TextField(blank=True, null=True) filtered_main_title = models.TextField(blank=True, null=True) end_answers_raw = models.TextField(blank=True, null=True) formatter_error = models.TextField(blank=True, null=True) @@ -109,11 +112,11 @@ def create_xml_files(self): logger = FilenameLoggingAdapter(newlogger, {'filename': str(self.id)}) try: ql_obj = QuestionLibrary.objects.filter(id=self.id).first() - parsed_xml = XmlWriter(ql_obj) + parsed_xml = ScormWriter(ql_obj) manifest_entity = ManifestEntity() manifest_resource_entity = ManifestResourceEntity('res_question_library', 'webcontent', 'd2lquestionlibrary', 'questiondb.xml', 'Question Library') manifest_entity.add_resource(manifest_resource_entity) - manifest = parsed_xml.create_manifest(manifest_entity, self.folder_path) + manifest = build_manifest(manifest_entity) parsed_imsmanifest = ET.tostring(manifest.getroot(), encoding='utf-8', xml_declaration=True).decode() parsed_imsmanifest = parseString(parsed_imsmanifest) parsed_imsmanifest = parsed_imsmanifest.toprettyxml(indent="\t") @@ -121,18 +124,28 @@ def create_xml_files(self): self.save() logger.info("imsmanifest String Created") except Exception as e: - logger.error("imsmanifest String Failed") - self.error = "imsmanifest String Failed" + logger.error(f"imsmanifest String Failed: {e}") + self.error = f"imsmanifest String Failed: {e}\n{traceback.format_exc()}" self.save() + return try: + if "parsed_xml" not in locals(): + raise RuntimeError("ScormWriter failed; questiondb_string not generated.") questiondb_string = parsed_xml.questiondb_string media_folder = self.media_folder if self.media_folder != None else f'./assessment-assets/{self.filtered_main_title}/' img_elements = re.findall(r"\", questiondb_string, re.MULTILINE) for idx, img in enumerate(img_elements): img_src = re.findall(r"src=\"(.*?)\"", img, re.MULTILINE) + if not img_src: + continue + if ";base64," not in img_src[0]: + # Skip non-base64 images (external paths or placeholders) + continue base64_img = img_src[0].split(';base64,') + if len(base64_img) < 2: + continue img_string = base64_img[1] img_ext = base64_img[0].split("/")[1] image_data = base64.b64decode(img_string) @@ -161,10 +174,17 @@ def create_xml_files(self): logger.info("QuestionDB String Created") except Exception as e: - logger.error("QuestionDB String Failed") - - self.error = "QuestionDB String Failed" + logger.error(f"QuestionDB String Failed: {e}") + self.error = f"QuestionDB String Failed: {e}\n{traceback.format_exc()}" self.save() + return + + if not self.questiondb_string: + if not self.error: + self.error = "XML files Failed: questiondb_string is empty or missing." + self.save() + logger.error("XML files Failed: questiondb_string is empty or missing.") + return try: questiondb_file = ContentFile(self.questiondb_string, name="questiondb.xml") @@ -175,8 +195,8 @@ def create_xml_files(self): # print(datetime.now().strftime("%H:%M:%S"), "imsmanifest.xml and questiondb.xml created!") except Exception as e: - logger.error("XML files Failed") - self.error = "XML files Failed" + logger.error(f"XML files Failed: {e}") + self.error = f"XML files Failed: {e}\n{traceback.format_exc()}" self.save() def zip_files(self): @@ -196,9 +216,8 @@ def zip_files(self): logger.info("ZIP file Created") except Exception as e: - logger.error("ZIP file Failed") - - self.error = "ZIP file Failed" + logger.error(f"ZIP file Failed: {e}") + self.error = f"ZIP file Failed: {e}" self.save() def create_zip_file_package(self): @@ -212,8 +231,8 @@ def create_zip_file_package(self): self.save() logger.info("ZIP file with JSON package Created") except Exception as e: - logger.error("ZIP file with JSON package Failed") - self.error = "ZIP file Failed" + logger.error(f"ZIP file with JSON package Failed: {e}") + self.error = f"ZIP file Failed: {e}" self.save() def cleanup(self): diff --git a/api/pipelines/__init__.py b/api/pipelines/__init__.py new file mode 100644 index 0000000..865de7c --- /dev/null +++ b/api/pipelines/__init__.py @@ -0,0 +1 @@ +# Pipeline orchestration layer. diff --git a/api/pipelines/docx_to_json.py b/api/pipelines/docx_to_json.py new file mode 100644 index 0000000..ee80b9e --- /dev/null +++ b/api/pipelines/docx_to_json.py @@ -0,0 +1,36 @@ +import logging +from api.pipelines.ws_pipeline import Process, run_pipeline +from api.pipelines.response_payload import build_response_payload + +logger = logging.getLogger(__name__) + + +class DocxToJsonError(Exception): + def __init__(self, message, process=None): + super().__init__(message) + self.process = process + + +def build_docx_to_json(questionlibrary): + """ + Run the DOCX pipeline and return the QuestionLibrary instance. + """ + pipeline = Process(questionlibrary) + try: + run_pipeline(pipeline) + except Exception as exc: + raise DocxToJsonError(str(exc), process=pipeline) + return pipeline.questionlibrary + + +def docx_to_json(questionlibrary, logger_instance=None): + """ + High-level function to convert DOCX to JSON. + Returns the JSON payload and QuestionLibrary instance. + """ + log = logger_instance or logger + log.info(f"[{questionlibrary.id}] DOCX to JSON conversion started") + ql_instance = build_docx_to_json(questionlibrary) + json_data = build_response_payload(ql_instance) + log.info(f"[{ql_instance.id}] DOCX to JSON conversion completed") + return json_data, ql_instance diff --git a/api/pipelines/json_to_docx.py b/api/pipelines/json_to_docx.py new file mode 100644 index 0000000..0a69311 --- /dev/null +++ b/api/pipelines/json_to_docx.py @@ -0,0 +1,239 @@ +import base64 +import glob +import os +import re +import uuid +import subprocess +import logging +from os import path + +from django.conf import settings +from django.core.files import File +from django.http import FileResponse + +from api.serializers import QuestionLibraryPackageSerializer +from api.formats.scorm.scorm_formatter import ScormFormatter + +logger = logging.getLogger(__name__) + +class JsonToDocxError(Exception): + def __init__(self, errors): + super().__init__("JSON to DOCX validation failed") + self.errors = errors + + +def build_docx_from_json(json_data, logger_instance=None): + """ + High-level function to convert JSON to DOCX file. + Returns a FileResponse and QuestionLibrary instance. + """ + log = logger_instance or logger + + payload = json_data.get("data", json_data) + ql_serializer = QuestionLibraryPackageSerializer(data=payload) + if not ql_serializer.is_valid(): + raise JsonToDocxError(ql_serializer.errors) + + ql_instance = ql_serializer.save() + ql_instance.filter_main_title() + ql_instance.folder_path = settings.MEDIA_ROOT + str(ql_instance.id) + ql_instance.image_path = ql_instance.folder_path + settings.MEDIA_URL + ql_instance.create_directory() + ql_instance.save() + + formatter = ScormFormatter() + markdown_text = formatter.format_to_markdown(ql_instance) + + image_counter = 0 + base64_pattern = r']*?)src=["\'](data:image/([^;]+);base64,([^"\']+))["\']([^>]*?)>' + + def replace_base64_with_file(match): + nonlocal image_counter + before_src = match.group(1) + image_type = match.group(3) + base64_data = match.group(4) + after_src = match.group(5) + + try: + image_data = base64.b64decode(base64_data) + ext_map = { + "png": "png", + "jpeg": "jpg", + "jpg": "jpg", + "gif": "gif", + "svg+xml": "svg", + "webp": "webp", + } + ext = ext_map.get(image_type.lower(), "png") + image_filename = f"image_{image_counter}_{uuid.uuid4().hex[:8]}.{ext}" + image_path = path.join(ql_instance.folder_path, image_filename) + + with open(image_path, "wb") as img_file: + img_file.write(image_data) + + image_counter += 1 + log.info( + f"Extracted base64 image to file: {image_filename} ({len(image_data)} bytes)" + ) + + alt_match = re.search(r'alt=["\']([^"\']*)["\']', before_src + after_src) + alt_text = alt_match.group(1) if alt_match else "image" + markdown_image = f"![{alt_text}]({image_filename})" + log.debug(f"Replacing base64 img tag with markdown: {markdown_image}") + return markdown_image + except Exception as e: + log.error(f"Error extracting base64 image: {str(e)}") + return match.group(0) + + markdown_text = re.sub(base64_pattern, replace_base64_with_file, markdown_text) + log.info(f"Extracted {image_counter} base64 images to files") + + if ql_instance.main_title: + filename = ql_instance.main_title.strip() + filename = re.sub(r'[<>:"/\\|?*]', "", filename) + filename = re.sub(r"\s+", "_", filename) + filename = filename[:100] + if not filename: + filename = ql_instance.filtered_main_title + else: + filename = ql_instance.filtered_main_title + + docx_filename = f"{filename}.docx" + docx_path = path.join(ql_instance.folder_path, docx_filename) + + current_file_dir = os.path.dirname(os.path.abspath(__file__)) + base_dir = os.path.dirname(os.path.dirname(current_file_dir)) + mdblockquote_path = os.path.abspath( + os.path.join(base_dir, "pandoc", "pandoc-filters", "mdblockquote.lua") + ) + emptypara_path = os.path.abspath( + os.path.join(base_dir, "pandoc", "pandoc-filters", "emptypara.lua") + ) + log.debug( + f"Lua filter paths: mdblockquote={mdblockquote_path}, emptypara={emptypara_path}" + ) + + temp_md_path = path.join(ql_instance.folder_path, "temp_markdown.md") + with open(temp_md_path, "w", encoding="utf-8") as f: + f.write(markdown_text) + + file_refs = re.findall(r'!\[.*?\]\((image_\d+_[^)]+)\)', markdown_text) + log.info(f"Found {len(file_refs)} image file references in markdown") + image_files = glob.glob(path.join(ql_instance.folder_path, "image_*.*")) + image_info = [] + total_image_size = 0 + for img_file in image_files: + if path.exists(img_file): + img_size = path.getsize(img_file) + total_image_size += img_size + img_size_mb = img_size / (1024 * 1024) + image_info.append( + f"{path.basename(img_file)} ({img_size_mb:.2f} MB, {img_size} bytes)" + ) + if len(image_files) > 0: + log.info(f"Found {len(image_files)} image files in folder:") + for info in image_info: + log.info(f" - {info}") + log.info( + f"Total image size: {total_image_size / (1024 * 1024):.2f} MB ({total_image_size} bytes)" + ) + log.info(f"Markdown file created at: {temp_md_path}") + + original_cwd = os.getcwd() + try: + os.chdir(ql_instance.folder_path) + temp_md_rel_path = "temp_markdown.md" + docx_output_name = os.path.basename(docx_path) + log.info( + f"Converting markdown with image file references to DOCX (working dir: {os.getcwd()})" + ) + existing_images = glob.glob("image_*.*") + log.info(f"Images in working directory before Pandoc: {existing_images}") + with open(temp_md_rel_path, "r", encoding="utf-8") as f: + md_content = f.read() + image_refs_in_md = re.findall(r'!\[.*?\]\((image_\d+_[^)]+)\)', md_content) + log.info(f"Image references found in markdown file: {image_refs_in_md}") + pandoc_cmd = [ + "pandoc", + temp_md_rel_path, + "-f", + "markdown_github+fancy_lists+emoji+hard_line_breaks+all_symbols_escapable+escaped_line_breaks+pipe_tables+startnum+tex_math_dollars", + "-t", + "docx+empty_paragraphs", + "-o", + docx_output_name, + "--no-highlight", + "--preserve-tabs", + "--wrap=preserve", + "--indent=false", + "--mathml", + "--ascii", + "--lua-filter=" + mdblockquote_path, + "--lua-filter=" + emptypara_path, + ] + log.info(f"Running pandoc command: {' '.join(pandoc_cmd)}") + result = subprocess.run( + pandoc_cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + if result.returncode != 0: + log.error(f"Pandoc failed (exit {result.returncode}): {result.stderr}") + raise Exception(f"Pandoc failed: {result.stderr}") + if result.stderr: + log.warning(f"Pandoc warnings: {result.stderr}") + log.info("Pandoc markdown to DOCX conversion completed") + finally: + os.chdir(original_cwd) + + try: + if path.exists(temp_md_path): + from os import remove + + remove(temp_md_path) + + image_files = ( + glob.glob(path.join(ql_instance.folder_path, "image_*.png")) + + glob.glob(path.join(ql_instance.folder_path, "image_*.jpg")) + + glob.glob(path.join(ql_instance.folder_path, "image_*.jpeg")) + + glob.glob(path.join(ql_instance.folder_path, "image_*.gif")) + + glob.glob(path.join(ql_instance.folder_path, "image_*.svg")) + + glob.glob(path.join(ql_instance.folder_path, "image_*.webp")) + ) + for img_file in image_files: + try: + if path.exists(img_file): + os.remove(img_file) + except Exception as e: + log.warning( + f"Could not remove temporary image file {img_file}: {str(e)}" + ) + except Exception: + pass + + with open(docx_path, "rb") as f: + ql_instance.temp_file.save(docx_filename, File(f), save=True) + + file_response = FileResponse(ql_instance.temp_file) + file_response["Content-Disposition"] = f'attachment; filename="{docx_filename}"' + + docx_size_bytes = path.getsize(docx_path) + docx_size_mb = docx_size_bytes / (1024 * 1024) + log.info( + f"[{ql_instance.id}] JSON to DOCX conversion completed - DOCX size: {docx_size_mb:.2f} MB ({docx_size_bytes} bytes)" + ) + + return file_response, ql_instance + + +def json_to_docx(json_data, logger_instance=None): + """ + High-level function to convert JSON to DOCX file. + Returns a FileResponse and QuestionLibrary instance. + """ + log = logger_instance or logger + log.info("JSON to DOCX conversion started") + file_response, ql_instance = build_docx_from_json(json_data, log) + log.info(f"[{ql_instance.id}] JSON to DOCX conversion completed") + return file_response, ql_instance diff --git a/api/pipelines/json_to_scorm.py b/api/pipelines/json_to_scorm.py new file mode 100644 index 0000000..9cdbf28 --- /dev/null +++ b/api/pipelines/json_to_scorm.py @@ -0,0 +1,66 @@ +from django.conf import settings +from django.http import FileResponse +from api.serializers import QuestionLibraryPackageSerializer +import logging + +logger = logging.getLogger(__name__) + + +class JsonToScormError(Exception): + def __init__(self, errors): + super().__init__("JSON to SCORM validation failed") + self.errors = errors + + +def build_scorm_from_json(json_data): + """ + Build SCORM ZIP from JSON data. + Returns the QuestionLibrary instance with zip_file created. + """ + payload = json_data.get("data", json_data) + ql_serializer = QuestionLibraryPackageSerializer(data=payload) + if not ql_serializer.is_valid(): + raise JsonToScormError(ql_serializer.errors) + + ql_instance = ql_serializer.save() + ql_instance.filter_main_title() + ql_instance.folder_path = settings.MEDIA_ROOT + str(ql_instance.id) + ql_instance.image_path = ql_instance.folder_path + settings.MEDIA_URL + ql_instance.create_directory() + ql_instance.save() + + ql_instance.create_xml_files() + missing_files = [] + if not ql_instance.imsmanifest_file: + missing_files.append("imsmanifest_file") + if not ql_instance.questiondb_file: + missing_files.append("questiondb_file") + if missing_files: + detail = ql_instance.error or "XML generation failed." + raise JsonToScormError({"xml_files": [detail], "missing_files": missing_files}) + + ql_instance.zip_files() + + if not ql_instance.zip_file: + detail = ql_instance.error or "Zip file was not created." + raise JsonToScormError({"zip_file": [detail]}) + + return ql_instance + + +def json_to_scorm(json_data, logger_instance=None): + """ + High-level function to convert JSON to SCORM ZIP file. + Returns a FileResponse and QuestionLibrary instance. + """ + log = logger_instance or logger + log.info("JSON to SCORM conversion started") + ql_instance = build_scorm_from_json(json_data) + + file_name = f"{ql_instance.filtered_main_title}.zip" + file_response = FileResponse(ql_instance.zip_file) + file_response['Content-Disposition'] = f'attachment; filename="{file_name}"' + + log.info(f"[{ql_instance.id}] JSON to SCORM conversion completed") + + return file_response, ql_instance diff --git a/api/pipelines/response_payload.py b/api/pipelines/response_payload.py new file mode 100644 index 0000000..014cf9e --- /dev/null +++ b/api/pipelines/response_payload.py @@ -0,0 +1,141 @@ +import copy +import re +import socket + +from django.conf import settings + +from api.serializers import JsonResponseSerializer, count_errors +from api.formats.docx.process_helper import html_to_plain, trim_text + + +def build_response_payload(questionlibrary, preview=False): + count_errors(questionlibrary) + serializer = JsonResponseSerializer(questionlibrary) + json_data = serializer.data + json_data["total_question_errors"] = str(questionlibrary.total_question_errors or 0) + json_data["total_document_errors"] = str(questionlibrary.total_document_errors or 0) + + questionlibrary.json_data = json_data + questionlibrary.save(update_fields=["json_data"]) + + if preview: + return _apply_preview_transform(copy.deepcopy(json_data), questionlibrary) + + return json_data + + +def build_status_payload(status, statustext, data="", process=None, questionlibrary=None): + if process: + payload = process.sendformat(status, statustext, data) + else: + payload = { + "hostname": socket.gethostname(), + "version": settings.APP_VERSION, + "status": status, + "statustext": statustext, + "images_count": "0", + "section_count": "0", + "questions_count": "0", + "endanswer_count": "0", + "question_info_count": "0", + "question_warning_count": "0", + "question_error_count": "0", + "data": data, + } + + if questionlibrary: + total_question_errors = getattr(questionlibrary, "total_question_errors", 0) or 0 + total_document_errors = getattr(questionlibrary, "total_document_errors", 0) or 0 + payload["total_question_errors"] = str(total_question_errors) + payload["total_document_errors"] = str(total_document_errors) + + return payload + + +def _apply_preview_transform(json_data, questionlibrary): + def replace_placeholders(text): + if not text: + return text + + pattern = r"<<<<(\d+)>>>>" + + def replace_match(match): + image_id = match.group(1) + try: + image = questionlibrary.get_image(int(image_id)) + return image.image or match.group(0) + except Exception: + return match.group(0) + + return re.sub(pattern, replace_match, text) + + def build_title_from_text(text): + if not text: + return None + + has_table = re.search(r"", text) + has_img = re.search(r"]+>", text) + + title_text = text.replace("\n", " ") + title_text = re.sub(r"", "[IMG]", title_text) + title_text = re.sub(r"", "[TABLE]", title_text) + title_text = re.sub(r"<<<<\d+>>>>", "[IMG]", title_text) + + title_text = html_to_plain(title_text) + title_text = trim_text(title_text) + + prefix = "" + if has_table: + prefix = "[TABLE]" + prefix + if has_img: + prefix = "[IMG]" + prefix + + if prefix: + prefix = prefix + " " + title_text = re.sub(r"\s*\[IMG\]", "", title_text).strip() + title_text = re.sub(r"\s*\[TABLE\]", "", title_text).strip() + + title_text = prefix + title_text + return title_text[:127] + + for section in json_data.get("sections", []): + section["text"] = replace_placeholders(section.get("text")) + + for question in section.get("questions", []): + question["text"] = replace_placeholders(question.get("text")) + + if not question.get("title"): + question["title"] = build_title_from_text(question.get("text")) + + for mc in question.get("multiple_choice") or []: + for answer in mc.get("multiple_choice_answers") or []: + answer["answer"] = replace_placeholders(answer.get("answer")) + answer["answer_feedback"] = replace_placeholders(answer.get("answer_feedback")) + + for tf in question.get("true_false") or []: + tf["true_feedback"] = replace_placeholders(tf.get("true_feedback")) + tf["false_feedback"] = replace_placeholders(tf.get("false_feedback")) + + for fib in question.get("fib") or []: + fib["text"] = replace_placeholders(fib.get("text")) + + for ms in question.get("multiple_select") or []: + for answer in ms.get("multiple_select_answers") or []: + answer["answer"] = replace_placeholders(answer.get("answer")) + answer["answer_feedback"] = replace_placeholders(answer.get("answer_feedback")) + + for ordering in question.get("ordering") or []: + ordering["text"] = replace_placeholders(ordering.get("text")) + ordering["ord_feedback"] = replace_placeholders(ordering.get("ord_feedback")) + + for matching in question.get("matching") or []: + for choice in matching.get("matching_choices") or []: + choice["choice_text"] = replace_placeholders(choice.get("choice_text")) + for answer in choice.get("matching_answers") or []: + answer["answer_text"] = replace_placeholders(answer.get("answer_text")) + + for wr in question.get("written_response") or []: + wr["initial_text"] = replace_placeholders(wr.get("initial_text")) + wr["answer_key"] = replace_placeholders(wr.get("answer_key")) + + return json_data diff --git a/api/pipelines/scorm_to_json.py b/api/pipelines/scorm_to_json.py new file mode 100644 index 0000000..875bc58 --- /dev/null +++ b/api/pipelines/scorm_to_json.py @@ -0,0 +1,50 @@ +from os import path +import logging + +from api.serializers import QuestionLibraryPackageSerializer, count_errors +from api.formats.scorm.scorm_extractor import ScormExtractor + +logger = logging.getLogger(__name__) + + +class ScormToJsonError(Exception): + def __init__(self, message): + super().__init__(message) + + +def build_scorm_to_json(instance): + """ + Run the SCORM extractor and return JSON data + QuestionLibrary instance. + """ + scorm_zip_path = instance.temp_file.path + xml_reader = ScormExtractor( + scorm_zip_path, + extract_to_path=path.join(instance.folder_path, "scorm_extract"), + ) + + question_library = xml_reader.populate_django_models(instance) + ql_serializer = QuestionLibraryPackageSerializer(question_library) + json_data = ql_serializer.data + + count_errors(question_library) + json_data["total_question_errors"] = str(question_library.total_question_errors or 0) + json_data["total_document_errors"] = str(question_library.total_document_errors or 0) + + instance.json_data = json_data + instance.save() + + return json_data, question_library + + +def scorm_to_json(instance, logger_instance=None): + """ + High-level function to convert SCORM ZIP to JSON. + Returns the JSON data and QuestionLibrary instance. + """ + log = logger_instance or logger + log.info(f"[{instance.id}] SCORM to JSON conversion started") + + json_data, question_library = build_scorm_to_json(instance) + log.info(f"[{instance.id}] SCORM to JSON conversion completed") + + return json_data, question_library diff --git a/api/pipelines/ws_pipeline.py b/api/pipelines/ws_pipeline.py new file mode 100644 index 0000000..828f07c --- /dev/null +++ b/api/pipelines/ws_pipeline.py @@ -0,0 +1,142 @@ +from bs4 import BeautifulSoup +from api.formats.docx.extract_images import extract_images +from api.formats.docx.formatter import run_formatter +from api.formats.docx.sectioner import run_sectioner +from api.formats.docx.splitter import Splitter +from api.formats.docx.endanswers import get_endanswers +from api.formats.docx.parser import run_parser +from api.formats.docx.convert_txt import convert_txt +from api.formats.docx.fix_numbering import fix_numbering +import socket +from api.tasks import run_pandoc_task +from django.conf import settings +import logging +from api.logging.logging_adapter import FilenameLoggingAdapter + +from api.logging.ErrorTypes import * + +logger = logging.getLogger(__name__) + + +class Process: + def __init__(self, questionlibrary) -> None: + self.questionlibrary = questionlibrary + self.images_extracted = 0 + self.subsection_count = 0 + self.questions_expected = 0 + self.questions_processed = 0 + self.endanswers_count = 0 + self.question_info_count = 0 + self.question_warning_count = 0 + self.question_error_count = 0 + + def run_pandoc(self): + file_logger = FilenameLoggingAdapter( + logger, + { + "filename": self.questionlibrary.temp_file.name, + "user_ip": self.questionlibrary.user_ip, + }, + ) + try: + result = run_pandoc_task.apply_async( + kwargs={"questionlibrary_id": self.questionlibrary.id}, + ignore_result=False, + ) + pandoc_task_result = result.get() + self.questionlibrary.pandoc_output = pandoc_task_result + except Exception as e: + raise Exception(str(e)) + + if self.questionlibrary.pandoc_output is None: + raise MarkDownConversionError("Pandoc output string is empty") + + def convert_txt(self): + convert_txt(self.questionlibrary) + + def fix_numbering(self): + fix_numbering(self.questionlibrary) + + def extract_images(self): + self.images_extracted = extract_images(self.questionlibrary) + + def run_formatter(self): + file_logger = FilenameLoggingAdapter( + logger, + { + "filename": self.questionlibrary.temp_file.name, + "user_ip": self.questionlibrary.user_ip, + }, + ) + file_logger.debug("starting formatter antlr process") + run_formatter(self.questionlibrary) + + # This is to split sections into separate objects + def run_sectioner(self): + file_logger = FilenameLoggingAdapter( + logger, + { + "filename": self.questionlibrary.temp_file.name, + "user_ip": self.questionlibrary.user_ip, + }, + ) + file_logger.debug("starting sectioner antlr process") + self.subsection_count = run_sectioner(self.questionlibrary) + + def run_splitter(self): + file_logger = FilenameLoggingAdapter( + logger, + { + "filename": self.questionlibrary.temp_file.name, + "user_ip": self.questionlibrary.user_ip, + }, + ) + file_logger.debug("starting splitter antlr process") + splitter = Splitter(self.questionlibrary) + self.questions_expected = splitter.run_splitter() + + def get_endanswers(self): + self.endanswers_count = get_endanswers(self.questionlibrary) + + def run_parser(self): + file_logger = FilenameLoggingAdapter( + logger, + { + "filename": self.questionlibrary.temp_file.name, + "user_ip": self.questionlibrary.user_ip, + }, + ) + file_logger.debug("starting questionparser antlr process") + run_parser(self.questionlibrary) + + def sendformat(self, status, statustext, data): + return { + "hostname": socket.gethostname(), + "version": settings.APP_VERSION, + "status": status, + "statustext": statustext, + "images_count": str(self.images_extracted), + "section_count": str(self.subsection_count), + "questions_count": str(self.questions_expected), + "endanswer_count": str(self.endanswers_count), + "question_info_count": str(self.question_info_count), + "question_warning_count": str(self.question_warning_count), + "question_error_count": str(self.question_error_count), + "data": data, + } + + +def run_pipeline(pipeline): + pipeline.run_pandoc() + pipeline.extract_images() + pipeline.run_formatter() + pipeline.run_sectioner() + pipeline.run_splitter() + pipeline.get_endanswers() + pipeline.run_parser() + return pipeline + + +def process(questionlibrary): + pipeline = Process(questionlibrary) + return run_pipeline(pipeline) diff --git a/api/process/process.py b/api/process/process.py deleted file mode 100644 index 0f384fc..0000000 --- a/api/process/process.py +++ /dev/null @@ -1,127 +0,0 @@ -from bs4 import BeautifulSoup -from .extract_images import extract_images -from .formatter import run_formatter -from .sectioner import run_sectioner -from .splitter import Splitter -from .endanswers import get_endanswers -from .parser import run_parser -from .convert_txt import convert_txt -from .fix_numbering import fix_numbering -import socket -from api.tasks import run_pandoc_task -from django.conf import settings -import logging -newlogger = logging.getLogger(__name__) -# from api.logging.contextfilter import QuestionlibraryFilenameFilter -# logger.addFilter(QuestionlibraryFilenameFilter()) -from api.logging.logging_adapter import FilenameLoggingAdapter - -from api.logging.ErrorTypes import * -import os - -class Process: - def __init__(self, questionlibrary) -> None: - self.questionlibrary = questionlibrary - self.images_extracted = 0 - self.subsection_count = 0 - self.questions_expected = 0 - self.questions_processed = 0 - self.endanswers_count = 0 - self.question_info_count = 0 - self.question_warning_count = 0 - self.question_error_count = 0 - - def run_pandoc(self): - logger = FilenameLoggingAdapter(newlogger, { - 'filename': self.questionlibrary.temp_file.name, - 'user_ip': self.questionlibrary.user_ip - }) - try: - result = run_pandoc_task.apply_async(kwargs={"questionlibrary_id":self.questionlibrary.id}, ignore_result=False) - pandoc_task_result = result.get() - # logger.debug(pandoc_task_result) - self.questionlibrary.pandoc_output = pandoc_task_result - except Exception as e: - raise Exception(str(e)) - - if self.questionlibrary.pandoc_output == None: - raise MarkDownConversionError("Pandoc output string is empty") - - def convert_txt(self): - convert_txt(self.questionlibrary) - - def fix_numbering(self): - # logger = FilenameLoggingAdapter(newlogger, { - # 'filename': self.questionlibrary.temp_file.name, - # 'user_ip': self.questionlibrary.user_ip - # }) - # logger.debug("starting pandoc html to md") - # try: - # result = convert_html_to_md.apply_async(kwargs={"questionlibrary_id":self.questionlibrary.id}, ignore_result=False) - # convert_html_to_md_task_result = result.get() - # logger.debug("pdf to md result") - # logger.debug(convert_html_to_md_task_result) - # self.questionlibrary.txt_output = convert_html_to_md_task_result - # self.questionlibrary.save() - # except Exception as e: - # raise Exception(str(e)) - - fix_numbering(self.questionlibrary) - - def extract_images(self): - self.images_extracted = extract_images(self.questionlibrary) - - def run_formatter(self): - logger = FilenameLoggingAdapter(newlogger, { - 'filename': self.questionlibrary.temp_file.name, - 'user_ip': self.questionlibrary.user_ip - }) - logger.debug("starting formatter antlr process") - run_formatter(self.questionlibrary) - - # This is to split sections into separate objects - def run_sectioner(self): - logger = FilenameLoggingAdapter(newlogger, { - 'filename': self.questionlibrary.temp_file.name, - 'user_ip': self.questionlibrary.user_ip - }) - logger.debug("starting sectioner antlr process") - self.subsection_count = run_sectioner(self.questionlibrary) - - def run_splitter(self): - logger = FilenameLoggingAdapter(newlogger, { - 'filename': self.questionlibrary.temp_file.name, - 'user_ip': self.questionlibrary.user_ip - }) - logger.debug("starting splitter antlr process") - splitter = Splitter(self.questionlibrary) - self.questions_expected = splitter.run_splitter() - - def get_endanswers(self): - self.endanswers_count = get_endanswers(self.questionlibrary) - - def run_parser(self): - logger = FilenameLoggingAdapter(newlogger, { - 'filename': self.questionlibrary.temp_file.name, - 'user_ip': self.questionlibrary.user_ip - }) - logger.debug("starting questionparser antlr process") - run_parser(self.questionlibrary) - - def sendformat(self, status, statustext, data): - - return { - 'hostname': socket.gethostname(), - 'version': settings.APP_VERSION, - 'status': status, - 'statustext': statustext, - 'images_count': str(self.images_extracted), - 'section_count': str(self.subsection_count), - 'questions_count': str(self.questions_expected), - 'endanswer_count': str(self.endanswers_count), - 'question_info_count': str(self.question_info_count), - 'question_warning_count': str(self.question_warning_count), - 'question_error_count': str(self.question_error_count), - 'data': data - } -# ++++++++++++++++++++++++++++++++=================================== diff --git a/api/questions/__init__.py b/api/questions/__init__.py new file mode 100644 index 0000000..39d70bd --- /dev/null +++ b/api/questions/__init__.py @@ -0,0 +1 @@ +# Question domain logic. diff --git a/api/questions/model_builders/__init__.py b/api/questions/model_builders/__init__.py new file mode 100644 index 0000000..0baf102 --- /dev/null +++ b/api/questions/model_builders/__init__.py @@ -0,0 +1 @@ +# Question builders. diff --git a/api/process/questionbuilder/fib.py b/api/questions/model_builders/fib.py similarity index 98% rename from api/process/questionbuilder/fib.py rename to api/questions/model_builders/fib.py index 8bf9574..a7e4d2d 100644 --- a/api/process/questionbuilder/fib.py +++ b/api/questions/model_builders/fib.py @@ -1,6 +1,6 @@ from ...models import Fib import re -from ..process_helper import markdown_to_plain +from api.formats.docx.process_helper import markdown_to_plain def build_inline_FIB(question): question.questiontype = 'FIB' diff --git a/api/process/questionbuilder/matching.py b/api/questions/model_builders/matching.py similarity index 98% rename from api/process/questionbuilder/matching.py rename to api/questions/model_builders/matching.py index fc1e15c..830a182 100644 --- a/api/process/questionbuilder/matching.py +++ b/api/questions/model_builders/matching.py @@ -1,6 +1,6 @@ import re from ...models import Matching, MatchingChoice, MatchingAnswer -from ..process_helper import add_error_message, trim_text, markdown_to_html +from api.formats.docx.process_helper import add_error_message, trim_text, markdown_to_html from api.logging.ErrorTypes import MATNoMatchError, MATMissingChoiceError, MATMissingAnswerError def build_inline_MAT(question, answers): diff --git a/api/process/questionbuilder/multiplechoice.py b/api/questions/model_builders/multiplechoice.py similarity index 96% rename from api/process/questionbuilder/multiplechoice.py rename to api/questions/model_builders/multiplechoice.py index c9a4c21..15e5efd 100644 --- a/api/process/questionbuilder/multiplechoice.py +++ b/api/questions/model_builders/multiplechoice.py @@ -1,6 +1,6 @@ import re from ...models import MultipleChoice, MultipleChoiceAnswer -from ..process_helper import add_warning_message, trim_text, trim_md_to_plain, trim_md_to_html +from api.formats.docx.process_helper import add_warning_message, trim_text, trim_md_to_plain, trim_md_to_html from api.logging.WarningTypes import MCEndAnswerExistWarning from celery.utils.log import get_task_logger from api.logging.logging_adapter import FilenameLoggingAdapter diff --git a/api/process/questionbuilder/multipleselect.py b/api/questions/model_builders/multipleselect.py similarity index 92% rename from api/process/questionbuilder/multipleselect.py rename to api/questions/model_builders/multipleselect.py index 9957185..c7c5dcc 100644 --- a/api/process/questionbuilder/multipleselect.py +++ b/api/questions/model_builders/multipleselect.py @@ -1,6 +1,6 @@ import re from ...models import MultipleSelect, MultipleSelectAnswer -from ..process_helper import add_warning_message, trim_text, trim_md_to_html, trim_md_to_plain +from api.formats.docx.process_helper import add_warning_message, trim_text, trim_md_to_html, trim_md_to_plain from api.logging.WarningTypes import MSEndAnswerExistWarning def build_inline_MS(question, answers, is_random, enumeration): @@ -19,7 +19,7 @@ def build_inline_MS(question, answers, is_random, enumeration): for answer_order, answer_item in enumerate(answers): ms_answerobject = MultipleSelectAnswer.objects.create(multiple_select=ms_object) answer_index = trim_text(answer_item.get('answer_prefix')) - ms_answerobject.index = re.sub(r'[\W_]', '', answer_index) + ms_answerobject.index = re.sub(r'[\\W_]', '', answer_index) ms_answerobject.order = answer_order + 1 ms_answerobject.answer = trim_md_to_html(answer_item.get('answer_content')) answer_feedback = answer_item.get('feedback') @@ -57,7 +57,7 @@ def build_endanswer_MS(question, answers, endanswer, is_random, enumeration): for idx, answer_item in enumerate(answers): ms_answerobject = MultipleSelectAnswer.objects.create(multiple_select=ms_object) answer_index = trim_text(answer_item.get('answer_prefix')) - ms_answerobject.index = re.sub(r'[\W_]', '', answer_index) + ms_answerobject.index = re.sub(r'[\\W_]', '', answer_index) ms_answerobject.order = idx + 1 ms_answerobject.answer = trim_md_to_html(answer_item.get('answer_content')) answer_feedback = answer_item.get('feedback') diff --git a/api/process/questionbuilder/ordering.py b/api/questions/model_builders/ordering.py similarity index 94% rename from api/process/questionbuilder/ordering.py rename to api/questions/model_builders/ordering.py index c6c3f98..2ccb7c5 100644 --- a/api/process/questionbuilder/ordering.py +++ b/api/questions/model_builders/ordering.py @@ -1,5 +1,5 @@ from ...models import Ordering -from ..process_helper import trim_md_to_html +from api.formats.docx.process_helper import trim_md_to_html def build_inline_ORD(question, answers): question.questiontype = 'ORD' diff --git a/api/process/questionbuilder/truefalse.py b/api/questions/model_builders/truefalse.py similarity index 97% rename from api/process/questionbuilder/truefalse.py rename to api/questions/model_builders/truefalse.py index f185773..151b0aa 100644 --- a/api/process/questionbuilder/truefalse.py +++ b/api/questions/model_builders/truefalse.py @@ -1,5 +1,5 @@ from ...models import TrueFalse -from ..process_helper import add_error_message, trim_text, trim_md_to_html, markdown_to_plain +from api.formats.docx.process_helper import add_error_message, trim_text, trim_md_to_html, markdown_to_plain from api.logging.ErrorTypes import TFNoAnswerError, TFSelectedAnswerError from celery.utils.log import get_task_logger from api.logging.logging_adapter import FilenameLoggingAdapter diff --git a/api/process/questionbuilder/writtenresponse.py b/api/questions/model_builders/writtenresponse.py similarity index 94% rename from api/process/questionbuilder/writtenresponse.py rename to api/questions/model_builders/writtenresponse.py index a984410..40f931c 100644 --- a/api/process/questionbuilder/writtenresponse.py +++ b/api/questions/model_builders/writtenresponse.py @@ -1,5 +1,5 @@ from ...models import WrittenResponse -from ..process_helper import add_warning_message, trim_md_to_html +from api.formats.docx.process_helper import add_warning_message, trim_md_to_html from api.logging.WarningTypes import WREndAnswerExistWarning def build_inline_WR_with_keyword(question, wr_answer): diff --git a/api/scorm/XmlWriter.py b/api/scorm/XmlWriter.py deleted file mode 100644 index 338b674..0000000 --- a/api/scorm/XmlWriter.py +++ /dev/null @@ -1,755 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. - -import copy -from difflib import Match -import os -import random -import shutil -import datetime -import re -import time -import xml.etree.cElementTree as ET -from uuid import UUID -from .xmlcdata import CDATA -from os import makedirs, path, walk -from os.path import basename -from django.conf import settings -from xml.dom.minidom import parseString -from zipfile import * - - -class XmlWriter: - def __init__(self, question_library): - ident = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f") - questionLibraryIdent = "QLIB_" + ident - root_el = ET.Element("questestinterop") - objectbank_el = ET.SubElement(root_el, "objectbank", {"ident": questionLibraryIdent, "xmlns:d2l_2p0": "http://desire2learn.com/xsd/d2lcp_v2p0"}) - - # root_section_obj = question_library.get_root_section() - # root_section_el = self.create_section(objectbank_el, root_section_obj) - - base_ident = "SECT_" + str(datetime.datetime.now().strftime("%Y%m%d%H%M%S")) + str(int(UUID(int=0x12345678123456781234567812345678))) - base_section_el = ET.SubElement(objectbank_el, "section", {"ident": base_ident, "title": question_library.main_title}) - if question_library.shuffle is True: - self.create_section_shuffle(base_section_el) - - self.create_presentation_material(base_section_el, "") # we currently not catching any base section text and it's ignored in ANTLR - - sec_proc = ET.SubElement(base_section_el, "sectionproc_extension") - sec_proc_dis_name = ET.SubElement(sec_proc, "d2l_2p0:display_section_name") - # TODO: add is_title_displayed and text to QuestionLibrary because not all exam has root section - sec_proc_dis_name.text = "yes" # section_obj.is_title_displayed if section_obj.is_title_displayed else "yes" - sec_proc_dis_line = ET.SubElement(sec_proc, "d2l_2p0:display_section_line") - sec_proc_dis_line.text = "no" - sec_proc_dis_sec = ET.SubElement(sec_proc, "d2l_2p0:type_display_section") - sec_proc_dis_sec.text = "0" # "1" if section_obj.is_text_displayed else "0" - - - section_objs = question_library.get_sections() - for section_obj in section_objs: - if section_obj.is_main_content is True: - root_question_objs = section_obj.get_questions() - self.create_questions(base_section_el, root_question_objs) - else: - current_section_el = self.create_section(base_section_el, section_obj) - question_objs = section_obj.get_questions() - self.create_questions(current_section_el, question_objs) - self.questiondb_string = self.xml_to_string(root_el) - - - def create_section(self, parent_el, section_obj): - sectionIdent = "SECT_" + str(datetime.datetime.now().strftime("%Y%m%d%H%M%S")) + str(int(UUID(int=0x12345678123456781234567812345678))) - section_el = ET.SubElement(parent_el, "section", {"ident": sectionIdent, "title": section_obj.title}) - if section_obj.shuffle is True: - self.create_section_shuffle(section_el) - - self.create_presentation_material(section_el, section_obj.text) - self.create_sectionproc_extension(section_el, section_obj) - - return section_el - - - def create_section_shuffle(self, section_el): - # section > selection_ordering > order - sel_ord = ET.SubElement(section_el, "selection_ordering") - sel_ord_ord = ET.SubElement(sel_ord, "order", {"order_type": "Random"}) - - - def create_presentation_material(self, section_el, section_text): - # presentation_material Node - sec_pres_mat = ET.SubElement(section_el, "presentation_material") - sec_pres_mat_flo = ET.SubElement(sec_pres_mat, "flow_mat") - sec_pres_mat_flo_flo = ET.SubElement(sec_pres_mat_flo, "flow_mat") - sec_pres_mat_flo_flo_mat = ET.SubElement(sec_pres_mat_flo_flo, "material") - sec_pres_mat_flo_flo_mat_text = ET.SubElement(sec_pres_mat_flo_flo_mat, "mattext", {"texttype": "text/html"}) - if section_text: - sec_pres_mat_flo_flo_mat_text.append(CDATA(section_text)) - - - def create_sectionproc_extension(self, section_el, section_obj): - # presentation_material Node - sec_proc = ET.SubElement(section_el, "sectionproc_extension") - sec_proc_dis_name = ET.SubElement(sec_proc, "d2l_2p0:display_section_name") - sec_proc_dis_name.text = section_obj.is_title_displayed if section_obj.is_title_displayed else "yes" - sec_proc_dis_line = ET.SubElement(sec_proc, "d2l_2p0:display_section_line") - sec_proc_dis_line.text = "no" - sec_proc_dis_sec = ET.SubElement(sec_proc, "d2l_2p0:type_display_section") - sec_proc_dis_sec.text = "1" if section_obj.is_text_displayed else "0" - - - def create_questions(self, section_el, question_objs): - for question in question_objs: - time_ns = str(time.process_time_ns()) - random_int = str(random.randint(1000000, 9999999)) - ident = time_ns + random_int - question_ident = "QUES_" + ident - item_el = ET.Element("item", {"ident": "OBJ_" + ident, "label": question_ident, "d2l_2p0:page": "1", "title": question.title}) - # question_type = question.get_question_type() - question_type = question.questiontype - match question_type: - case "MC": - self.generate_multiple_choice(item_el, question_ident, question) - case "TF": - self.generate_true_false(item_el, question_ident, question) - case "FIB" | "FMB": - self.generate_fill_in_the_blanks(item_el, question_ident, question) - case "MS" | "MR": - self.generate_multi_select(item_el, question_ident, question) - case "MAT" | "MT": - self.generate_matching(item_el, question_ident, question) - case "ORD": - self.generate_ordering(item_el, question_ident, question) - case "WR" | "E": - self.generate_written_response(item_el, question_ident, question) - - section_el.append(item_el) - - - def itemetadata(self, it, question_type, question): - # ItemData Node - it_metadata = ET.SubElement(it, "itemmetadata") - it_metadata_qtidata = ET.SubElement(it_metadata, "qtimetadata") - it_computer_scored = ET.SubElement(it_metadata_qtidata, "qti_metadatafield") - it_computer_scored_label = ET.SubElement(it_computer_scored, "fieldlabel") - it_computer_scored_label.text = "qmd_computerscored" - it_computer_scored_entry = ET.SubElement(it_computer_scored, "fieldentry") - it_computer_scored_entry.text = "yes" - it_question_type = ET.SubElement(it_metadata_qtidata, "qti_metadatafield") - it_question_type_label = ET.SubElement(it_question_type, "fieldlabel") - it_question_type_label.text = "qmd_questiontype" - it_question_type_entry = ET.SubElement(it_question_type, "fieldentry") - it_question_type_entry.text = question_type - it_weighting = ET.SubElement(it_metadata_qtidata, "qti_metadatafield") - it_weighting_label = ET.SubElement(it_weighting, "fieldlabel") - it_weighting_label.text = "qmd_weighting" - it_weighting_entry = ET.SubElement(it_weighting, "fieldentry") - it_weighting_entry.text = "{:.4f}".format(question.points) - - - def itemproc_extension(self, it): - # Itemproc_extension Node - it_proc = ET.SubElement(it, "itemproc_extension") - it_proc_difficulty = ET.SubElement(it_proc, "d2l_2p0:difficulty") - it_proc_difficulty.text = "1" - it_proc_isbonus = ET.SubElement(it_proc, "d2l_2p0:isbonus") - it_proc_isbonus.text = "no" - it_proc_ismandatory = ET.SubElement(it_proc, "d2l_2p0:ismandatory") - it_proc_ismandatory.text = "no" - - - def generate_feedback(self, it, ident, feedback): - it_fb = ET.SubElement(it, "itemfeedback", {"ident": ident}) - it_fb_mat = ET.SubElement(it_fb, "material") - it_fb_mat_text = ET.SubElement(it_fb_mat, "mattext", {"texttype": "text/html"}) - it_fb_mat_text.append(CDATA(feedback)) - - - def generate_hint(self, it, hint): - it_hint = ET.SubElement(it, "hint") - it_hint_mat = ET.SubElement(it_hint, "hintmaterial") - it_hint_mat_flow = ET.SubElement(it_hint_mat, "flow_mat") - it_hint_mat_flow_mat = ET.SubElement(it_hint_mat_flow, "material") - it_hint_mat_flow_text = ET.SubElement(it_hint_mat_flow_mat, "mattext", {"texttype": "text/html"}) - it_hint_mat_flow_text.append(CDATA(hint)) - - - def xml_to_string(self, xml): - rough_string = ET.tostring(xml, "utf-8") - reparsed = parseString(rough_string) - pretty_xml = reparsed.toprettyxml(indent="\t") - return pretty_xml - - - def create_manifest(self, manifest_entity, folder_path): - path = folder_path + "/imsmanifest.xml" - root = ET.Element("manifest", {"xmlns:d2l_2p0": "http://desire2learn.com/xsd/d2lcp_v2p0", "xmlns": "http://www.imsglobal.org/xsd/imscp_v1p1", "identifier": "MANIFEST_1"}) - doc = ET.SubElement(root, "resources") - - for resource in manifest_entity.resources: - ET.SubElement( - doc, "resource", {"identifier": resource.identifier, "type": resource.resource_type, "d2l_2p0:material_type": resource.material_type, "href": resource.href, "d2l_2p0:link_target": resource.link_target, "title": resource.title} - ) - - tree = ET.ElementTree(root) - # tree.write(path) - return tree - - - def generate_multiple_choice(self, it, question_ident, question): - self.itemetadata(it, "Multiple Choice", question) - self.itemproc_extension(it) - question_lid = question_ident + "_LID" - question_ident_answer = question_ident + "_A" - question_ident_feedback = question_ident + "_IF" - - # Presentation Node - it_pre = ET.SubElement(it, "presentation") - it_pre_flow = ET.SubElement(it_pre, "flow") - - # Presentation -> Flow - it_pre_flow_mat = ET.SubElement(it_pre_flow, "material") - - # Presentation -> Material - multiple_choice = question.get_multiple_choice() - it_pre_flow_mat_text = ET.SubElement(it_pre_flow_mat, "mattext", {"texttype": "text/html"}) - question_text = question.text - it_pre_flow_mat_text.append(CDATA(question_text)) - - # Presentation -> Flow -> Response_extension - it_pre_flow_res = ET.SubElement(it_pre_flow, "response_extension") - it_pre_flow_res_display_style = ET.SubElement(it_pre_flow_res, "d2l_2p0:display_style") - it_pre_flow_res_display_style.text = "2" - it_pre_flow_res_enumeration = ET.SubElement(it_pre_flow_res, "d2l_2p0:enumeration") - it_pre_flow_res_enumeration.text = str(multiple_choice.enumeration) if multiple_choice.enumeration else "4" - it_pre_flow_res_grading_type = ET.SubElement(it_pre_flow_res, "d2l_2p0:grading_type") - it_pre_flow_res_grading_type.text = "0" - # Presentation -> Flow -> Response_lid - it_pre_flow_lid = ET.SubElement(it_pre_flow, "response_lid", {"ident": question_lid, "rcardinality": "Multiple"}) - - # Commented this to deactivate MC randomized answer order - it_pre_flow_lid_render_choice = ET.SubElement(it_pre_flow_lid, "render_choice", {"shuffle": ("yes" if multiple_choice.randomize else "no")}) - - # Add hint - if question.hint: - self.generate_hint(it, question.hint) - - # Reprocessing - it_res = ET.SubElement(it, "resprocessing") - - # Add General feedback - if question.feedback: - self.generate_feedback(it, question_ident, question.feedback) - - mc_answer_index = 1 - for mc_answer in multiple_choice.get_multiple_choice_answers(): - - # Presentation -> Flow -> Response_lid -> Render_choice -> Flow_label - flow = ET.SubElement(it_pre_flow_lid_render_choice, "flow_label", {"class": "Block"}) - response_label = ET.SubElement(flow, "response_label", {"ident": question_ident_answer + str(mc_answer_index)}) - flow_mat = ET.SubElement(response_label, "flow_mat") - material = ET.SubElement(flow_mat, "material") - mattext = ET.SubElement(material, "mattext", {"texttype": "text/html"}) - mattext.append(CDATA(mc_answer.answer)) - - # Reprocessing -> Respcondition - it_res_con = ET.SubElement(it_res, "respcondition", {"title": "Response Condition" + str(mc_answer_index)}) - it_res_con_var = ET.SubElement(it_res_con, "conditionvar") - it_res_con_var_equal = ET.SubElement(it_res_con_var, "varequal", {"respident": question_lid}) - it_res_con_var_equal.text = question_ident_answer + str(mc_answer_index) - it_res_set_var = ET.SubElement(it_res_con, "setvar", {"action": "Set"}) - it_res_set_var.text = str(mc_answer.weight) if mc_answer.weight else "0.0000" - it_res_dis = ET.SubElement(it_res_con, "displayfeedback", {"feedbacktype": "Response", "linkrefid": question_ident_feedback + str(mc_answer_index)}) - - # Add Answer specific feedback - if mc_answer.answer_feedback: - self.generate_feedback(it, question_ident_feedback + str(mc_answer_index), mc_answer.answer_feedback) - mc_answer_index += 1 - - - def generate_true_false(self, it, question_ident, question): - self.itemetadata(it, "True/False", question) - self.itemproc_extension(it) - - question_lid = question_ident + "_LID" - question_ident_answer = question_ident + "_A" - question_ident_feedback = question_ident + "_IF" - - # Presentation Node - it_pre = ET.SubElement(it, "presentation") - it_pre_flow = ET.SubElement(it_pre, "flow") - - # Presentation -> Flow - it_pre_flow_mat = ET.SubElement(it_pre_flow, "material") - - true_false = question.get_true_false() - # Presentation -> Material - it_pre_flow_mat_text = ET.SubElement(it_pre_flow_mat, "mattext", {"texttype": "text/html"}) - question_text = question.text - it_pre_flow_mat_text.append(CDATA(question_text)) - - # Presentation -> Flow -> Response_extension - it_pre_flow_res = ET.SubElement(it_pre_flow, "response_extension") - it_pre_flow_res_display_style = ET.SubElement(it_pre_flow_res, "d2l_2p0:display_style") - it_pre_flow_res_display_style.text = "2" - it_pre_flow_res_enumeration = ET.SubElement(it_pre_flow_res, "d2l_2p0:enumeration") - it_pre_flow_res_enumeration.text = str(true_false.enumeration) if true_false.enumeration else "4" - it_pre_flow_res_grading_type = ET.SubElement(it_pre_flow_res, "d2l_2p0:grading_type") - it_pre_flow_res_grading_type.text = "0" - - # Presentation -> Flow -> Response_lid - it_pre_flow_lid = ET.SubElement(it_pre_flow, "response_lid", {"ident": question_lid, "rcardinality": "Single"}) - it_pre_flow_lid_render_choice = ET.SubElement(it_pre_flow_lid, "render_choice", {"shuffle": "no"}) - - # Reprocessing - it_res = ET.SubElement(it, "resprocessing") - - # Add General feedback - if question.feedback: - self.generate_feedback(it, question_ident, question.feedback) - - tf_index = 0 - answer_text = ["True", "False"] - while tf_index < 2: - # Presentation -> Flow -> Response_lid -> Render_choice -> Flow_label - flow = ET.SubElement(it_pre_flow_lid_render_choice, "flow_label", {"class": "Block"}) - response_label = ET.SubElement(flow, "response_label", {"ident": question_ident_answer + str(tf_index)}) - flow_mat = ET.SubElement(response_label, "flow_mat") - material = ET.SubElement(flow_mat, "material") - mattext = ET.SubElement(material, "mattext", {"texttype": "text/plain"}) - mattext.text = answer_text[tf_index] - - # Reprocessing -> Respcondition - it_res_con = ET.SubElement(it_res, "respcondition", {"title": "Response Condition" + str(tf_index)}) - it_res_con_var = ET.SubElement(it_res_con, "conditionvar") - it_res_con_var_equal = ET.SubElement(it_res_con_var, "varequal", {"respident": question_lid}) - it_res_con_var_equal.text = question_ident_answer + str(tf_index) - it_res_set_var = ET.SubElement(it_res_con, "setvar", {"action": "Set"}) - - if tf_index == 0: - current_weight = true_false.true_weight - current_feedback = true_false.true_feedback - else: - current_weight = true_false.false_weight - current_feedback = true_false.false_feedback - - it_res_set_var.text = str(current_weight) if current_weight else "0.0000" - it_res_dis = ET.SubElement(it_res_con, "displayfeedback", {"feedbacktype": "Response", "linkrefid": question_ident_feedback + str(tf_index)}) - - # Add Answer specific feedback - if current_feedback: - self.generate_feedback(it, question_ident_feedback + str(tf_index), current_feedback) - tf_index += 1 - - - def generate_fill_in_the_blanks(self, it, question_ident, question): - self.itemetadata(it, "Fill in the Blanks", question) - self.itemproc_extension(it) - - # Presentation Node - it_pre = ET.SubElement(it, "presentation") - it_pre_flow = ET.SubElement(it_pre, "flow") - # Presentation -> Flow - - idx = 1 - for fib in question.get_fibs(): - question_str = question_ident + str(idx) + "_STR" - question_ans = question_ident + str(idx) + "_ANS" - if fib.type == "fibanswer": - # Presentation -> Flow -> Response_str - it_pre_flow_str = ET.SubElement(it_pre_flow, "response_str", {"rcardinality": "Single", "ident": question_str}) - it_pre_flow_str_render = ET.SubElement(it_pre_flow_str, "render_fib", {"fibtype": "String", "prompt": "Box", "columns": "30", "rows": "1"}) - it_pre_flow_str_render_label = ET.SubElement(it_pre_flow_str_render, "response_label", {"ident": question_ans}) - idx += 1 - elif fib.type == "fibquestion": - # Presentation -> Flow -> Material - it_pre_flow_mat = ET.SubElement(it_pre_flow, "material") - it_pre_flow_mat_text = ET.SubElement(it_pre_flow_mat, "mattext", {"texttype": "text/html"}) - question_text = fib.text - it_pre_flow_mat_text.append(CDATA(question_text)) - - # Add hint - if question.hint: - self.generate_hint(it, question.hint) - - # Resprocessing - it_res = ET.SubElement(it, "resprocessing") - it_out = ET.SubElement(it_res, "outcomes") - - index = 1 - for fib_answers in question.get_fib_answers(): - answers = [a.strip() for a in fib_answers.text.split(",")] - - answer_weight = str(100.0 / len(question.get_fib_answers())) - question_ans = question_ident + str(index) + "_ANS" - for answer in answers: - it_res_con = ET.SubElement(it_res, "respcondition") - it_res_con_var = ET.SubElement(it_res_con, "conditionvar") - it_res_con_var_equal = ET.SubElement(it_res_con_var, "varequal", {"case": "no", "respident": question_ans}) - it_res_con_var_equal.text = answer - it_res_set_var = ET.SubElement(it_res_con, "setvar", {"action": "Set"}) - it_res_set_var.text = answer_weight - - it_out_score = ET.SubElement(it_out, "decvar", {"varname": "Blank_" + str(index), "maxvalue": "100", "minvalue": "0", "vartype": "Integer"}) - - index += 1 - - # Add General feedback - if question.feedback: - self.generate_feedback(it, question_ident, question.feedback) - - - def generate_multi_select(self, it, question_ident, question): - self.itemetadata(it, "Multi-Select", question) - self.itemproc_extension(it) - - question_lid = question_ident + "_LID" - question_ident_answer = question_ident + "_A" - question_ident_feedback = question_ident + "_IF" - - # Presentation Node - it_pre = ET.SubElement(it, "presentation") - it_pre_flow = ET.SubElement(it_pre, "flow") - - # Presentation -> Flow - it_pre_flow_mat = ET.SubElement(it_pre_flow, "material") - - multiple_select = question.get_multiple_select() - # Presentation -> Material - it_pre_flow_mat_text = ET.SubElement(it_pre_flow_mat, "mattext", {"texttype": "text/html"}) - question_text = question.text - it_pre_flow_mat_text.append(CDATA(question_text)) - - # Presentation -> Flow -> Response_extension - it_pre_flow_res = ET.SubElement(it_pre_flow, "response_extension") - it_pre_flow_res_display_style = ET.SubElement(it_pre_flow_res, "d2l_2p0:display_style") - it_pre_flow_res_display_style.text = "2" - it_pre_flow_res_enumeration = ET.SubElement(it_pre_flow_res, "d2l_2p0:enumeration") - it_pre_flow_res_enumeration.text = str(multiple_select.enumeration) if multiple_select.enumeration else "4" - it_pre_flow_res_grading_type = ET.SubElement(it_pre_flow_res, "d2l_2p0:grading_type") - it_pre_flow_res_grading_type.text = "2" - - # Presentation -> Flow -> Response_lid - it_pre_flow_lid = ET.SubElement(it_pre_flow, "response_lid", {"ident": question_lid, "rcardinality": "Multiple"}) - it_pre_flow_lid_render_choice = ET.SubElement(it_pre_flow_lid, "render_choice", {"shuffle": ("yes" if multiple_select.randomize else "no")}) - - # Add hint - if question.hint: - self.generate_hint(it, question.hint) - - # Reprocessing - it_res = ET.SubElement(it, "resprocessing") - it_out = ET.SubElement(it_res, "outcomes") - it_out_score = ET.SubElement(it_out, "decvar", {"vartype": "Integer", "defaultval": "0", "varname": "que_score", "minvalue": "0", "maxvalue": "100"}) - it_out_correct = ET.SubElement(it_out, "decvar", {"vartype": "Integer", "defaultval": "0", "varname": "D2L_Correct", "minvalue": "0"}) - it_out_incorrect = ET.SubElement(it_out, "decvar", {"vartype": "Integer", "defaultval": "0", "varname": "D2L_Incorrect", "minvalue": "0"}) - - # Add General feedback - if question.feedback: - self.generate_feedback(it, question_ident, question.feedback) - - ms_index = 1 - for ms_answer in multiple_select.get_multiple_select_answers(): - - # Presentation -> Flow -> Response_lid -> Render_choice -> Flow_label - flow = ET.SubElement(it_pre_flow_lid_render_choice, "flow_label", {"class": "Block"}) - response_label = ET.SubElement(flow, "response_label", {"ident": question_ident_answer + str(ms_index)}) - flow_mat = ET.SubElement(response_label, "flow_mat") - material = ET.SubElement(flow_mat, "material") - mattext = ET.SubElement(material, "mattext", {"texttype": "text/html"}) - mattext.text = ms_answer.answer - - # Reprocessing -> Respcondition - it_res_con = ET.SubElement(it_res, "respcondition", {"title": "Response Condition", "continue": "yes"}) - it_res_con_var = ET.SubElement(it_res_con, "conditionvar") - it_res_con_var_equal = ET.SubElement(it_res_con_var, "varequal", {"respident": question_lid}) - it_res_con_var_equal.text = question_ident_answer - - it_res_con_var_equal.text = question_ident_answer + str(ms_index) - if ms_answer.is_correct == True: - it_res_set_var = ET.SubElement(it_res_con, "setvar", {"varname": "D2L_Correct", "action": "Add"}) - else: - it_res_set_var = ET.SubElement(it_res_con, "setvar", {"varname": "D2L_Incorrect", "action": "Add"}) - - # Add Answer specific feedback - if ms_answer.answer_feedback: - self.generate_feedback(it, question_ident_feedback + str(ms_index), ms_answer.answer_feedback) - ms_index += 1 - - it_res_con = ET.SubElement(it_res, "respcondition") - it_res_set_var = ET.SubElement(it_res_con, "setvar", {"varname": "que_score", "action": "Set"}) - it_res_set_var.text = "D2L_Correct" - - - def generate_matching(self, it, question_ident, question): - self.itemetadata(it, "Matching", question) - self.itemproc_extension(it) - matching = question.get_matching() - question_ident_choice = question_ident + "_C" - question_ident_answer = question_ident + "_A" - question_ident_feedback = question_ident + "_IF" - - # Presentation Node - it_pre = ET.SubElement(it, "presentation") - it_pre_flow = ET.SubElement(it_pre, "flow") - - # Add hint - if question.hint: - self.generate_hint(it, question.hint) - - # Resprocessing Node - it_res = ET.SubElement(it, "resprocessing") - - # Resprocessing -> Outcomes - it_res_out = ET.SubElement(it_res, "outcomes") - it_res_out_dec_correct = ET.SubElement(it_res_out, "decvar", {"vartype": "Integer", "defaultval": "0", "varname": "D2L_Correct", "minvalue": "0", "maxvalue": "100"}) - it_res_out_dec_incorrect = ET.SubElement(it_res_out, "decvar", {"vartype": "Integer", "defaultval": "0", "varname": "D2L_Incorrect", "minvalue": "0", "maxvalue": "100"}) - it_res_out_dec_score = ET.SubElement(it_res_out, "decvar", {"vartype": "Decimal", "defaultval": "0", "varname": "que_score", "minvalue": "0", "maxvalue": "100"}) - - # Presentation -> Flow - it_pre_flow_mat = ET.SubElement(it_pre_flow, "material") - - # Presentation -> Material - it_pre_flow_mat_text = ET.SubElement(it_pre_flow_mat, "mattext", {"texttype": "text/html"}) - question_text = question.text - it_pre_flow_mat_text.append(CDATA(question_text)) - - # Presentation -> Flow -> Response_extension - it_pre_flow_res = ET.SubElement(it_pre_flow, "response_extension") - it_pre_flow_res_grading_type = ET.SubElement(it_pre_flow_res, "d2l_2p0:grading_type") - it_pre_flow_res_grading_type.text = '2' #str(matching.grading_type) - - # Presentation -> Flow -> Response_grp -> Render_choice - it_pre_flow_res_grp_ren = ET.Element("render_choice", {"shuffle": "yes"}) # add to response_grp later - it_pre_flow_res_grp_ren_flow = ET.SubElement(it_pre_flow_res_grp_ren, "flow_label", {"class": "Block"}) - - it_temp = ET.Element("temp") - matching_answers = matching.get_unique_matching_answers() - - ma_index = 1 - for matching_answer_text in matching_answers: - matching_answer_index = question_ident_answer + str(ma_index) - it_grp_ren_flow_lab = ET.SubElement(it_pre_flow_res_grp_ren_flow, "response_label", {"ident": matching_answer_index}) - it_grp_ren_flow_lab_flow = ET.SubElement(it_grp_ren_flow_lab, "flow_mat") - it_grp_ren_flow_lab_flow_mat = ET.SubElement(it_grp_ren_flow_lab_flow, "material") - it_grp_ren_flow_lab_flow_mat_text = ET.SubElement(it_grp_ren_flow_lab_flow_mat, "mattext", {"texttype": "text/html"}) - it_grp_ren_flow_lab_flow_mat_text.append(CDATA(matching_answer_text)) - - it_respcondition = ET.SubElement(it_temp, "respcondition") - it_respcondition_conditionvar = ET.SubElement(it_respcondition, "conditionvar") - it_respcondition_varequal = ET.SubElement(it_respcondition_conditionvar, "varequal") - it_respcondition_varequal.text = matching_answer_index - it_respcondition_setvar = ET.SubElement(it_respcondition, "setvar", {"action": "Add"}) - it_respcondition_setvar.text = "1" - - ma_index += 1 - - mc_index = 1 - for matching_choice in matching.get_matching_choices(): - matching_choice_index = question_ident_choice + str(mc_index) - - # Presentation -> Flow -> Response_grp - it_pre_flow_res_grp = ET.SubElement(it_pre_flow, "response_grp", {"respident": matching_choice_index, "rcardinality": "Single"}) - - # Presentation -> Flow -> Response_grp -> Material - it_pre_flow_res_grp_mat = ET.SubElement(it_pre_flow_res_grp, "material") - it_pre_flow_res_grp_mattext = ET.SubElement(it_pre_flow_res_grp_mat, "mattext", {"texttype": "text/html"}) - it_pre_flow_res_grp_mattext.append(CDATA(matching_choice.choice_text)) - it_pre_flow_res_grp.append(it_pre_flow_res_grp_ren) - - for respcondition in it_temp: - conditionvar = respcondition.find("conditionvar") - varequal = conditionvar.find("varequal") - varequal.set("respident", matching_choice_index) - setvar = respcondition.find("setvar") - answer_mattext = it_pre_flow.find("response_grp[@respident='" + matching_choice_index + "'].//response_label[@ident='" + varequal.text + "'].//mattext") - is_correct = matching_choice.has_matching_answer(answer_mattext[0].text) - if is_correct is True: - setvar.set("varname", "D2L_Correct") - else: - setvar.set("varname", "D2L_Incorrect") - it_res.append(copy.deepcopy(respcondition)) - mc_index += 1 - - match matching.grading_type: - case 0: - it_respcondition = ET.SubElement(it_res, "respcondition") - it_respcondition_var = ET.SubElement(it_respcondition, "conditionvar") - it_respcondition_var_other = ET.SubElement(it_respcondition_var, "other") - it_resp_setvar = ET.SubElement(it_respcondition, "setvar", {"varname": "que_score", "action": "Set"}) - it_resp_setvar.text = "D2L_Correct" - case 1: - it_respcondition = ET.SubElement(it_res, "respcondition") - it_respcondition_var = ET.SubElement(it_respcondition, "conditionvar") - it_respcondition_var_vargte = ET.SubElement(it_respcondition_var, "vargte", {"respident": "D2L_Incorrect"}) - it_respcondition_var_vargte.text = "0" - it_resp_setvar = ET.SubElement(it_respcondition, "setvar", {"varname": "que_score", "action": "Set"}) - it_resp_setvar.text = "0" - - it_respcondition2 = copy.deepcopy(it_respcondition) - it_resp_setvar2 = it_respcondition2.find("setvar") - it_resp_setvar2.text = "1" - it_res.append(it_respcondition2) - case 2: - it_respcondition = ET.SubElement(it_res, "respcondition") - it_respcondition_var = ET.SubElement(it_respcondition, "conditionvar") - it_respcondition_var_vargte = ET.SubElement(it_respcondition_var, "vargte", {"respident": "D2L_Incorrect"}) - it_respcondition_var_vargte.text = "D2L_Correct" - it_resp_setvar = ET.SubElement(it_respcondition, "setvar", {"varname": "que_score", "action": "Set"}) - it_resp_setvar.text = "0" - - it_respcondition2 = ET.SubElement(it_res, "respcondition") - it_respcondition_var2 = ET.SubElement(it_respcondition2, "conditionvar") - it_respcondition_var_varlt = ET.SubElement(it_respcondition_var2, "varlt", {"respident": "D2L_Incorrect"}) - it_respcondition_var_vargte.text = "D2L_Correct" - it_resp_setvar2 = ET.SubElement(it_respcondition2, "setvar", {"varname": "que_score", "action": "Set"}) - it_resp_setvar2.text = "D2L_Correct" - it_resp_setvar3 = ET.SubElement(it_respcondition2, "setvar", {"varname": "que_score", "action": "Subtract"}) - it_resp_setvar3.text = "D2L_Incorrect" - - # Add General feedback - if question.feedback: - self.generate_feedback(it, question_ident, question.feedback) - - - def generate_ordering(self, it, question_ident, question): - self.itemetadata(it, "Ordering", question) - self.itemproc_extension(it) - - question_o = question_ident + "_O" - question_ident_feedback = question_ident + "_IF" - - # Presentation Node - it_pre = ET.SubElement(it, "presentation") - it_pre_flow = ET.SubElement(it_pre, "flow") - - # Presentation -> Flow - - # Presentation -> Flow -> Material - it_pre_flow_mat = ET.SubElement(it_pre_flow, "material") - it_pre_flow_mat_text = ET.SubElement(it_pre_flow_mat, "mattext", {"texttype": "text/html"}) - question_text = question.text - it_pre_flow_mat_text.append(CDATA(question_text)) - - # Presentation -> Flow -> Response_extension - it_pre_flow_res_ext = ET.SubElement(it_pre_flow, "response_extension") - it_pre_flow_res_ext_grading = ET.SubElement(it_pre_flow_res_ext, "d2l_2p0:grading_type") - grading_type = 2 # Equally weighted, All or nothing, Right minus wrong - it_pre_flow_res_ext_grading.append(CDATA(grading_type)) - - # Presentation -> Flow -> Response_grp - it_pre_flow_res_grp = ET.SubElement(it_pre_flow, "response_grp", {"ident": question_o, "rcardinality": "Ordered"}) - it_pre_flow_res_grp_render = ET.SubElement(it_pre_flow_res_grp, "render_choice", {"shuffle": "yes"}) - it_pre_flow_res_grp_render_flow = ET.SubElement(it_pre_flow_res_grp_render, "flow_label", {"class": "Block"}) # populated in the loop - - # Add hint - if question.hint: - self.generate_hint(it, question.hint) - - # Resprocessing - it_res = ET.SubElement(it, "resprocessing") # populated in the loop - it_out = ET.SubElement(it_res, "outcomes") - - it_out_correct = ET.SubElement(it_out, "decvar", {"maxvalue": "100", "minvalue": "0", "varname": "D2L_Correct", "defaultval": "0", "vartype": "Integer"}) - it_out_incorrect = ET.SubElement(it_out, "decvar", {"minvalue": "0", "varname": "D2L_Incorrect", "defaultval": "0", "vartype": "Integer"}) - it_out_que_score = ET.SubElement(it_out, "decvar", {"minvalue": "0", "varname": "que_score", "defaultval": "0", "vartype": "Integer"}) - - it_res_con_other = ET.SubElement(it_res, "respcondition") - it_res_con_other_var = ET.SubElement(it_res_con_other, "conditionvar") - it_res_con_other_var_other = ET.SubElement(it_res_con_other_var, "other") - it_res_con_other_setvar = ET.SubElement(it_res_con_other, "setvar", {"varname": "que_score", "action": "Set"}) - it_res_con_other_setvar.text = "D2L_Correct" - - # Add General feedback - if question.feedback: - self.generate_feedback(it, question_ident, question.feedback) - - ord_index = 1 - for ord in question.get_orderings(): - ident_num = question_o + str(ord_index) - # Presentation -> Flow -> Response_grp -> response_label - it_pre_flow_res_grp_render_flow_res = ET.SubElement(it_pre_flow_res_grp_render_flow, "response_label", {"ident": ident_num}) - it_pre_flow_res_grp_render_flow_res_flow = ET.SubElement(it_pre_flow_res_grp_render_flow_res, "flow_mat") - it_pre_flow_res_grp_render_flow_res_flow_mat = ET.SubElement(it_pre_flow_res_grp_render_flow_res_flow, "material") - it_pre_flow_res_grp_render_flow_res_flow_mat_text = ET.SubElement(it_pre_flow_res_grp_render_flow_res_flow_mat, "mattext", {"texttype": "text/html"}) - question_text = ord.text - it_pre_flow_res_grp_render_flow_res_flow_mat_text.append(CDATA(question_text)) - - # Resprocessing -> Respcondition - it_res_con_correct = ET.SubElement(it_res, "respcondition", {"title": "Correct Condition"}) - it_res_con_correct_var = ET.SubElement(it_res_con_correct, "conditionvar") - it_res_con_correct_var_equal = ET.SubElement(it_res_con_correct_var, "varequal", {"respident": ident_num}) - it_res_con_correct_var_equal.text = str(ord_index) - it_res_con_correct_setvar = ET.SubElement(it_res_con_correct, "setvar", {"varname": "D2L_Correct", "action": "Add"}) - it_res_con_correct_setvar.text = str(1) - - it_res_con_incorrect = ET.SubElement(it_res, "respcondition", {"title": "Incorrect Condition"}) - it_res_con_incorrect_var = ET.SubElement(it_res_con_incorrect, "conditionvar") - it_res_con_incorrect_var_not = ET.SubElement(it_res_con_incorrect_var, "not") - it_res_con_incorrect_var_not_equal = ET.SubElement(it_res_con_incorrect_var_not, "varequal", {"respident": ident_num}) - it_res_con_incorrect_var_not_equal.text = str(ord_index) - it_res_con_incorrect_setvar = ET.SubElement(it_res_con_incorrect, "setvar", {"varname": "D2L_Incorrect", "action": "Add"}) - it_res_con_incorrect_setvar.text = str(1) - - # Add Answer specific feedback - if ord.ord_feedback: - self.generate_feedback(it, question_ident_feedback + str(ord_index), ord.ord_feedback) - ord_index += 1 - - - def generate_written_response(self, it, question_ident, question): - self.itemetadata(it, "Long Answer", question) - self.itemproc_extension(it) - - question_ident_str = question_ident + "_STR" - question_ident_la = question_ident + "_LA" - - # Presentation Node - it_pre = ET.SubElement(it, "presentation") - it_pre_flow = ET.SubElement(it_pre, "flow") - - written_response = question.get_written_response() - - # Presentation -> Flow - # Presentation -> Flow -> Material - it_pre_flow_mat = ET.SubElement(it_pre_flow, "material") - it_pre_flow_mat_text = ET.SubElement(it_pre_flow_mat, "mattext", {"texttype": "text/html"}) - question_text = question.text - it_pre_flow_mat_text.append(CDATA(question_text)) - - # Presentation -> Flow -> Response_extension - it_pre_flow_mat_res_ext = ET.SubElement(it_pre_flow, "response_extension") - it_pre_flow_mat_res_ext_sign = ET.SubElement(it_pre_flow_mat_res_ext, "d2l_2p0:has_signed_comments") - it_pre_flow_mat_res_ext_sign.append(CDATA("no")) - it_pre_flow_mat_res_ext_editor = ET.SubElement(it_pre_flow_mat_res_ext, "d2l_2p0:has_htmleditor") - - # Change it to "no" to deactivate student HTML editor answer - it_pre_flow_mat_res_ext_editor.append(CDATA("no")) - - # Presentation -> Flow -> Response_str - it_pre_flow_mat_res_str = ET.SubElement(it_pre_flow, "response_str", {"rcardinality": "Multiple", "ident": question_ident_str}) - it_pre_flow_mat_res_str_render = ET.SubElement(it_pre_flow_mat_res_str, "render_fib", {"fibtype": "String", "prompt": "Box", "columns": "100", "rows": "15"}) - it_pre_flow_mat_res_str_render_label = ET.SubElement(it_pre_flow_mat_res_str_render, "response_label", {"ident": question_ident_la}) - it_pre_flow_mat_res_str_render_label_mat = ET.SubElement(it_pre_flow_mat_res_str_render_label, "material") - it_pre_flow_mat_res_str_render_label_mat_text = ET.SubElement(it_pre_flow_mat_res_str_render_label_mat, "mattext", {"texttype": "text/html"}) - - # Add hint - if question.hint: - self.generate_hint(it, question.hint) - # Add General feedback - if question.feedback: - self.generate_feedback(it, question_ident, question.feedback) - # Initial_text - it_init_text = ET.SubElement(it, "initial_text") - it_init_text_mat = ET.SubElement(it, "initial_text_material") - it_init_text_mat_flow = ET.SubElement(it_init_text_mat, "flow_mat") - it_init_text_mat_flow_mat = ET.SubElement(it_init_text_mat_flow, "material") - it_init_text_mat_flow_mat_text = ET.SubElement(it_init_text_mat_flow_mat, "mattext", {"texttype": "text/html"}) - # Answer_key - it_ans = ET.SubElement(it, "answer_key") - it_ans_mat = ET.SubElement(it_ans, "answer_key_material") - it_ans_mat_flow = ET.SubElement(it_ans_mat, "flow_mat") - it_ans_mat_flow_mat = ET.SubElement(it_ans_mat_flow, "material") - it_ans_mat_flow_mat_text = ET.SubElement(it_ans_mat_flow_mat, "mattext", {"texttype": "text/html"}) - it_ans_mat_flow_mat_text.append(CDATA(written_response.answer_key)) diff --git a/api/scorm/manifest.py b/api/scorm/manifest.py deleted file mode 100644 index ba20feb..0000000 --- a/api/scorm/manifest.py +++ /dev/null @@ -1,22 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. - -class ManifestEntity(object): - resources = [] - - def __init__(self): - del self.resources[:] - - def add_resource(self, manifest_resource_entity): - self.resources.append(manifest_resource_entity) - - -class ManifestResourceEntity(object): - def __init__(self, identifier, resource_type, material_type, href, title = '', link_target = ''): - self.identifier = identifier - self.resource_type = resource_type - self.material_type = material_type - self.href = href - self.title = title - self.link_target = link_target \ No newline at end of file diff --git a/api/serializers.py b/api/serializers.py index d8dbc47..8c69fed 100644 --- a/api/serializers.py +++ b/api/serializers.py @@ -5,6 +5,7 @@ from rest_framework import serializers from .models import Matching, MatchingAnswer, MatchingChoice, Ordering, QuestionLibrary, Section, Question, MultipleChoice, MultipleChoiceAnswer, TrueFalse, Fib, MultipleSelect, MultipleSelectAnswer, WrittenResponse from django.conf import settings +from .formats.docx.process_helper import trim_md_to_html def validate_docx_file(value): @@ -12,22 +13,47 @@ def validate_docx_file(value): raise serializers.ValidationError("not a valid word file") +def validate_zip_file(value): + """Validate that uploaded file is a ZIP file.""" + if not value.name.endswith('.zip'): + raise serializers.ValidationError("not a valid zip file") + return value + + def count_errors(questionlibrary): + """ + Count document and question errors. + For reverse conversion (SCORM to JSON), errors are typically 0 since + we're not parsing with ANTLR which would generate errors. + """ # COUNT NUMBER OF DOCUMENT ERRORS - doc_errorlist = DocumentError.objects.filter(document=questionlibrary) - questionlibrary.total_document_errors = doc_errorlist.count() + # Check if DocumentError model exists (it may not be defined) + try: + from .models import DocumentError + doc_errorlist = DocumentError.objects.filter(document=questionlibrary) + questionlibrary.total_document_errors = doc_errorlist.count() + except (ImportError, AttributeError, NameError): + # DocumentError model doesn't exist, set to 0 + questionlibrary.total_document_errors = 0 # COUNT NUMBER OF QUESTION ERRORS - question_list = Question.objects.filter(question_library=questionlibrary) - num_question_errors = 0 - for q in question_list: - q_errorlist = QuestionError.objects.filter(question=q) - num_question_errors += q_errorlist.count() - questionlibrary.total_question_errors = num_question_errors + # Check if QuestionError model exists (it may not be defined) + try: + from .models import QuestionError + question_list = Question.objects.filter(section__question_library=questionlibrary) + num_question_errors = 0 + for q in question_list: + q_errorlist = QuestionError.objects.filter(question=q) + num_question_errors += q_errorlist.count() + questionlibrary.total_question_errors = num_question_errors + except (ImportError, AttributeError, NameError): + # QuestionError model doesn't exist, set to 0 + questionlibrary.total_question_errors = 0 + questionlibrary.save() -class WordToJsonSerializer(serializers.Serializer): +class DocxToJsonSerializer(serializers.Serializer): temp_file = serializers.FileField(validators=[validate_docx_file], max_length=100, allow_empty_file=False, use_url=True) @@ -46,8 +72,6 @@ def create(self, validated_data): newconversion.create_directory() newconversion.save() - newconversion.create_pandocstring() - newconversion.save() return newconversion def update(self, instance, validated_data): @@ -56,6 +80,25 @@ def update(self, instance, validated_data): return instance +class ScormToJsonSerializer(serializers.Serializer): + """Serializer for SCORM ZIP file upload to convert to JSON (mirrors DocxToJsonSerializer).""" + scorm_file = serializers.FileField(validators=[validate_zip_file], max_length=100, allow_empty_file=False, use_url=True) + + def create(self, validated_data): + newconversion = QuestionLibrary.objects.create() + newconversion.temp_file = validated_data.get('scorm_file', validated_data) + + # Set main title from filename + newconversion.main_title = newconversion.temp_file.name.split(".")[0] + newconversion.filter_main_title() + newconversion.folder_path = settings.MEDIA_ROOT + str(newconversion.id) + newconversion.image_path = newconversion.folder_path + settings.MEDIA_URL + newconversion.create_directory() + newconversion.save() + + return newconversion + + class JsonToScormSerializer(serializers.Serializer): json_data = serializers.JSONField(initial=dict) @@ -195,6 +238,15 @@ class QuestionSerializer(serializers.ModelSerializer): matching = MatchingSerializer(many=True, allow_null=True) ordering = serializers.SerializerMethodField() written_response = WrittenResponseSerializer(many=True, allow_null=True) + points = serializers.SerializerMethodField() + + def get_points(self, obj): + """Normalize points: remove trailing zeros and decimal if not needed (e.g., 1.0000 -> '1', 1.5 -> '1.5')""" + if obj.points is None: + return None + # Convert to normalized string: remove trailing zeros and decimal point if not needed + normalized = str(float(obj.points)).rstrip('0').rstrip('.') + return normalized if normalized else '0' def get_fib(self, question): ordering_queryset = question.get_fibs() @@ -226,14 +278,20 @@ class Meta: class JsonResponseSerializer(serializers.ModelSerializer): # sections = SectionSerializer(many=True, read_only=True) sections = serializers.SerializerMethodField() + main_text = serializers.SerializerMethodField() def get_sections(self, questionlibrary): section_queryset = questionlibrary.get_sections() serializer = SectionSerializer(instance=section_queryset, many=True) return serializer.data + + def get_main_text(self, questionlibrary): + if not questionlibrary.main_text: + return questionlibrary.main_text + return trim_md_to_html(questionlibrary.main_text) class Meta: model = QuestionLibrary - fields = ['main_title', 'randomize_answer', 'enumeration', 'media_folder', 'sections'] + fields = ['main_title', 'main_text', 'randomize_answer', 'enumeration', 'media_folder', 'sections'] ############################## `/package` serializers ############################## @@ -262,6 +320,15 @@ class QuestionPackageSerializer(serializers.ModelSerializer): matching = MatchingSerializer(many=True, allow_null=True) ordering = OrderingSerializer(many=True, allow_null=True) written_response = WrittenResponseSerializer(many=True, allow_null=True) + points = serializers.SerializerMethodField() + + def get_points(self, obj): + """Normalize points: remove trailing zeros and decimal if not needed (e.g., 1.0000 -> '1', 1.5 -> '1.5')""" + if obj.points is None: + return None + # Convert to normalized string: remove trailing zeros and decimal point if not needed + normalized = str(float(obj.points)).rstrip('0').rstrip('.') + return normalized if normalized else '0' class Meta: model = Question @@ -278,10 +345,17 @@ class Meta: class QuestionLibraryPackageSerializer(serializers.ModelSerializer): sections = SectionPackageSerializer(many=True, allow_null=True) + main_text = serializers.CharField(required=False, allow_null=True, allow_blank=True) class Meta: model = QuestionLibrary - fields = ['main_title', 'randomize_answer', 'enumeration', 'media_folder', 'formatter_output', 'sectioner_output', 'sections'] + fields = ['main_title', 'main_text', 'randomize_answer', 'enumeration', 'media_folder', 'formatter_output', 'sectioner_output', 'sections'] + + def to_representation(self, instance): + data = super().to_representation(instance) + if data.get('main_text'): + data['main_text'] = trim_md_to_html(data['main_text']) + return data def create(self, validated_data): sections_data = validated_data.pop('sections') diff --git a/api/tasks.py b/api/tasks.py index e02b2d5..0fce822 100644 --- a/api/tasks.py +++ b/api/tasks.py @@ -12,14 +12,14 @@ from .logging.logging_adapter import FilenameLoggingAdapter from .models import EndAnswer, Question, QuestionLibrary -from .process.process_helper import (add_error_message, add_warning_message, html_to_plain, markdown_to_plain, markdown_to_html, trim_md_to_html, trim_text) -from .process.questionbuilder.fib import build_endanswer_FIB, build_inline_FIB -from .process.questionbuilder.matching import (build_endanswer_MAT, build_inline_MAT) -from .process.questionbuilder.multiplechoice import (build_endanswer_MC, build_inline_MC) -from .process.questionbuilder.multipleselect import (build_endanswer_MS, build_inline_MS) -from .process.questionbuilder.ordering import (build_endanswer_ORD, build_inline_ORD) -from .process.questionbuilder.truefalse import (build_endanswer_TF, build_inline_TF) -from .process.questionbuilder.writtenresponse import (build_endanswer_WR_with_list, build_inline_WR_with_keyword, build_inline_WR_with_list) +from .formats.docx.process_helper import (add_error_message, add_warning_message, html_to_plain, markdown_to_plain, markdown_to_html, trim_md_to_html, trim_text) +from .questions.model_builders.fib import build_endanswer_FIB, build_inline_FIB +from .questions.model_builders.matching import (build_endanswer_MAT, build_inline_MAT) +from .questions.model_builders.multiplechoice import (build_endanswer_MC, build_inline_MC) +from .questions.model_builders.multipleselect import (build_endanswer_MS, build_inline_MS) +from .questions.model_builders.ordering import (build_endanswer_ORD, build_inline_ORD) +from .questions.model_builders.truefalse import (build_endanswer_TF, build_inline_TF) +from .questions.model_builders.writtenresponse import (build_endanswer_WR_with_list, build_inline_WR_with_keyword, build_inline_WR_with_list) logger = logging.getLogger(__name__) loggercelery = get_task_logger(__name__) @@ -712,6 +712,7 @@ def run_pandoc_task(questionlibrary_id): emptyparaPath = "./pandoc/pandoc-filters/emptypara.lua" imageFilterPath = "./pandoc/pandoc-filters/image.lua" tables = "./pandoc/pandoc-filters/tables.lua" + linebreakPath = "./pandoc/pandoc-filters/linebreak.lua" # listsPath = "./api/pandoc/pandoc-filters/lists.lua" pandoc_word_to_html = pypandoc.convert_file( @@ -744,6 +745,7 @@ def run_pandoc_task(questionlibrary_id): '--ascii', '--lua-filter=' + mdblockquotePath, '--lua-filter=' + emptyparaPath, + '--lua-filter=' + linebreakPath, # '--lua-filter=' + tables ]) pandoc_html_to_md = pandoc_html_to_md.rstrip() diff --git a/api/urls.py b/api/urls.py index b8971b4..69bd385 100644 --- a/api/urls.py +++ b/api/urls.py @@ -7,6 +7,8 @@ from django.conf import settings urlpatterns = [ - path('convert', views.WordToJson.as_view(), name='WordToJson'), - path('package', views.JsonToScorm.as_view(), name='JsonToScorm'), + path('docx-to-json', views.DocxToJson.as_view(), name='DocxToJson'), + path('json-to-scorm', views.JsonToScorm.as_view(), name='JsonToScorm'), + path('scorm-to-json', views.ScormToJson.as_view(), name='ScormToJson'), + path('json-to-docx', views.JsonToDocx.as_view(), name='JsonToDocx'), ] diff --git a/api/views.py b/api/views.py index 7e1e236..5edf778 100644 --- a/api/views.py +++ b/api/views.py @@ -2,24 +2,22 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. -import json -from rest_framework import viewsets -from .serializers import JsonToScormSerializer, QuestionLibraryPackageSerializer, WordToJsonSerializer -from rest_framework import generics +from .serializers import JsonToScormSerializer, DocxToJsonSerializer, ScormToJsonSerializer +from .pipelines.json_to_scorm import json_to_scorm, JsonToScormError +from .pipelines.scorm_to_json import scorm_to_json +from .pipelines.json_to_docx import json_to_docx, JsonToDocxError +from .pipelines.docx_to_json import docx_to_json, DocxToJsonError +from .pipelines.response_payload import build_status_payload from rest_framework.views import APIView -from rest_framework.response import Response -from django.http import FileResponse, JsonResponse -from rest_framework.permissions import IsAuthenticated, AllowAny +from django.http import JsonResponse +from rest_framework.permissions import AllowAny from rest_framework.authentication import TokenAuthentication from rest_framework.parsers import MultiPartParser from rest_framework.parsers import JSONParser -from django.core.files.base import ContentFile from django.conf import settings -from .models import QuestionLibrary - import logging logger = logging.getLogger(__name__) from .logging.contextfilter import QuestionlibraryFilenameFilter @@ -32,193 +30,161 @@ class TokenAuthenticationWithBearer(TokenAuthentication): def __init__(self): super(TokenAuthenticationWithBearer, self).__init__() -class WordToJson(APIView): +class DocxToJson(APIView): parser_classes = [MultiPartParser] permission_classes = [AllowAny] authentication_classes = [TokenAuthenticationWithBearer] - serializer_class = WordToJsonSerializer + serializer_class = DocxToJsonSerializer def post(self, request, format=None): - is_random = False if 'randomize' in request.POST: if request.POST['randomize'].lower() in ("true", "yes"): is_random = True file_obj = request.data['temp_file'] - serializer = WordToJsonSerializer(data={ + serializer = DocxToJsonSerializer(data={ 'temp_file': file_obj, 'randomize': is_random }) - if serializer.is_valid(): - instance = serializer.save() - - # question_library = QuestionLibrary.objects.first() - - # question_library = instance - - # ============== start the process ======== - from .process.process import process - process(instance) - - # question_library_serializer = QuestionLibraryPackageSerializer(question_library) - - - json_string = '{"main_title":"Exam Title","randomize_answer":false,"total_question_errors":"1","total_document_errors":"0","sections":[{"is_main_content":true,"title":"Section title","is_title_displayed":false,"text":null,"is_text_displayed":false,"shuffle":false,"questions":[{"title":"MC title","text":"Question text","points":3.5,"difficulty":3,"mandatory":false,"hint":"Question hint","feedback":"Question feedback","multiple_choice":[{"randomize":true,"enumeration":1,"multiple_choice_answers":[{"answer":"MC first answer text","answer_feedback":"MC first answer feedback","weight":100},{"answer":"MC second answer text","answer_feedback":"MC second answer feedback","weight":0}]}],"true_false":null,"fib":null,"multiple_select":null,"ordering":null,"matching":null,"written_response":null},{"title":"TF title","text":"Question text","points":1,"difficulty":1,"mandatory":false,"hint":"Question hint","feedback":"Question feedback","multiple_choice":null,"true_false":[{"true_weight":100,"true_feedback":"true feedback","false_weight":0,"false_feedback":"true feedback","enumeration":2}],"fib":null,"multiple_select":null,"ordering":null,"matching":null,"written_response":null},{"title":"MS title","text":"Question text","points":1,"difficulty":1,"mandatory":false,"hint":"Question hint","feedback":"Question feedback","multiple_choice":null,"true_false":null,"fib":null,"multiple_select":[{"randomize":true,"enumeration":1,"style":2,"multiple_select_answers":[{"answer":"MS first answer text","answer_feedback":"MS first answer feedback","is_correct":true},{"answer":"MS second answer text","answer_feedback":"MS second answer feedback","is_correct":true}]}],"ordering":null,"matching":null,"written_response":null},{"title":"WR title","text":"Question text","points":5,"difficulty":5,"mandatory":false,"hint":"Question hint","feedback":"Question feedback","multiple_choice":null,"true_false":null,"fib":null,"multiple_select":null,"ordering":null,"matching":null,"written_response":[{"enable_student_editor":false,"initial_text":null,"answer_key":"WR answer key","enable_attachments":false}]},{"title":"FIB title","text":"Question text","points":4,"difficulty":3,"mandatory":false,"hint":"Question hint","feedback":"Question feedback","multiple_choice":null,"true_false":null,"fib":[{"type":"fibquestion","text":"1+15?","order":1,"size":null,"weight":null},{"type":"fibanswer","text":"16","order":2,"size":3,"weight":100}],"multiple_select":null,"ordering":null,"matching":null,"written_response":null},{"title":"Ordering title","text":"Question text","points":6,"difficulty":2,"mandatory":false,"hint":"Question hint","feedback":"Question feedback","multiple_choice":null,"true_false":null,"fib":null,"multiple_select":null,"ordering":[{"text":"Order 1","order":1,"ord_feedback":"Ordering 1 feedback"},{"text":"Order 1","order":2,"ord_feedback":"Ordering 2 feedback"},{"text":"Order 1","order":3,"ord_feedback":"Ordering 3 feedback"}],"matching":null,"written_response":null},{"title":"Matching title","text":"Question text","points":6,"difficulty":2,"mandatory":false,"hint":"Question hint","feedback":"Question feedback","multiple_choice":null,"true_false":null,"fib":null,"multiple_select":null,"ordering":null,"matching":[{"grading_type":1,"matching_choices":[{"choice_text":"Choice 1","matching_answers":[{"answer_text":"Choice 1 answer a"},{"answer_text":"Choice 1 answer b"}]},{"choice_text":"Choice 2","matching_answers":[{"answer_text":"Choice 2 answer a"},{"answer_text":"Choice 2 answer b"}]}]}],"written_response":null}]}]}' - json_data = json.loads(json_string) - for item in json_data: - match item: - case "main_title": - print(json_data["main_title"]) - case "randomize_answer": - print(json_data["randomize_answer"]) - case "total_question_errors": - print(json_data["total_question_errors"]) - case "total_document_errors": - print(json_data["total_document_errors"]) - case "sections": - for section in json_data["sections"]: - print("\t", section["title"]) - print("\t", section["is_title_displayed"]) - print("\t", section["text"]) - print("\t", section["is_text_displayed"]) - print("\t", section["shuffle"]) - - for question in section["questions"]: - print("\t\t", question["title"]) - print("\t\t", question["text"]) - print("\t\t", question["points"]) - print("\t\t", question["difficulty"]) - print("\t\t", question["mandatory"]) - print("\t\t", question["hint"]) - print("\t\t", question["feedback"]) - - if question["multiple_choice"]: - print("\t\t\tmultiple_choice") - for multiple_choice in question["multiple_choice"]: - - print("\t\t\t\t", multiple_choice["randomize"]) - print("\t\t\t\t", multiple_choice["enumeration"]) - - print("\t\t\t\tmultiple_choices_answers") - for mc_answers in multiple_choice["multiple_choices_answers"]: - print("\t\t\t\t\t", mc_answers["answer"]) - print("\t\t\t\t\t", mc_answers["answer_feedback"]) - print("\t\t\t\t\t", mc_answers["weight"]) - print("") - - elif question["true_false"]: - for true_false in question["true_false"]: - print("\t\t\ttrue_false") - print("\t\t\t\t", true_false["true_weight"]) - print("\t\t\t\t", true_false["true_feedback"]) - print("\t\t\t\t", true_false["false_weight"]) - print("\t\t\t\t", true_false["false_feedback"]) - print("\t\t\t\t", true_false["enumeration"]) - - elif question["fib"] : - print("\t\t\tfib") - for fib in question["fib"]: - print("\t\t\t\t", fib["type"]) - print("\t\t\t\t", fib["text"]) - print("\t\t\t\t", fib["order"]) - print("\t\t\t\t", fib["size"]) - print("\t\t\t\t", fib["weight"]) - print("") - elif question["multiple_select"]: - for multiple_select in question["multiple_select"]: - print("\t\t\tmultiple_select") - print("\t\t\t\t", multiple_select["randomize"]) - print("\t\t\t\t", multiple_select["enumeration"]) - print("\t\t\t\t", multiple_select["style"]) - - print("\t\t\t\tmultiple_select_answers") - for ms_answers in multiple_select["multiple_select_answers"]: - print("\t\t\t\t\t", ms_answers["answer"]) - print("\t\t\t\t\t", ms_answers["answer_feedback"]) - print("\t\t\t\t\t", ms_answers["is_correct"]) - print("") - - elif question["written_response"]: - for written_response in question["written_response"]: - print("\t\t\twritten_response") - print("\t\t\t\t",written_response["enable_student_editor"]) - print("\t\t\t\t", written_response["initial_text"]) - print("\t\t\t\t", written_response["answer_key"]) - print("\t\t\t\t", written_response["enable_attachments"]) - - elif question["matching"]: - for matching in question["matching"]: - print("\t\t\tmatching") - print("\t\t\t\t", matching["grading_type"]) - - print("\t\t\t\tmatching_choices") - for matching_choice in matching["matching_choices"]: - print("\t\t\t\t\t", matching_choice["choice_text"]) - if matching_choice["matching_answers"]: - for matching_answer in matching_choice["matching_answers"]: - print("\t\t\t\t\t\t", matching_answer["answer_text"]) - print("") - - elif question["ordering"]: - print("\t\t\tordering") - for ordering in question["ordering"]: - print("\t\t\t\t", ordering["text"]) - print("\t\t\t\t", ordering["order"]) - print("\t\t\t\t", ordering["ord_feedback"]) - print("") - else: - print("******************************************************") - print("NO QUESTION TYPE\n\n") - print(question) - print("******************************************************") - - - - - - - instance.json_data = json_data - instance.save() - # print(instance.json_data) - instance.cleanup() + if not serializer.is_valid(): + error_payload = build_status_payload( + "Error", + "Validation failed", + serializer.errors, + questionlibrary=None, + process=None, + ) + return JsonResponse(error_payload, status=400) + + instance = serializer.save() + + try: + json_data, question_library = docx_to_json(instance, logger) + question_library.cleanup() return JsonResponse(json_data, status=200) - - return JsonResponse(serializer.errors, status=400) + except DocxToJsonError as exc: + error_payload = build_status_payload( + "Error", + str(exc), + "", + process=exc.process, + questionlibrary=instance, + ) + instance.cleanup() + return JsonResponse(error_payload, status=500) class JsonToScorm(APIView): parser_classes = [JSONParser] - permission_classes = [IsAuthenticated] + permission_classes = [AllowAny] authentication_classes = [TokenAuthenticationWithBearer] serializer_class = JsonToScormSerializer def post(self, request, format=None): - json_data = request.data - ql_serializer = QuestionLibraryPackageSerializer(data=json_data['data']) - if ql_serializer.is_valid(): - ql_instance = ql_serializer.save() - ql_instance.filter_main_title() - ql_instance.folder_path = settings.MEDIA_ROOT + str(ql_instance.id) - ql_instance.image_path = ql_instance.folder_path + settings.MEDIA_URL - ql_instance.create_directory() - ql_instance.save() - file_name = ql_instance.filtered_main_title - # if (ql_instance.total_question_errors + ql_instance.total_document_errors == 0): - ql_instance.create_xml_files() - ql_instance.zip_files() - file_response = FileResponse(ql_instance.zip_file) - file_response['Content-Disposition'] = 'attachment; filename="' + file_name + '"' + try: + file_response, ql_instance = json_to_scorm(json_data, logger) logger.addFilter(QuestionlibraryFilenameFilter(ql_instance)) - logger.info("[" + str(ql_instance.id) + "] " +">>>>>>>>>>Transaction Finished>>>>>>>>>>") - + logger.info(f"[{ql_instance.id}] Transaction Finished") ql_instance.cleanup() - return file_response - - return JsonResponse({"hostname": settings.APP_VERSION, "serializer_errors": ql_serializer.errors}, status=400) + except JsonToScormError as exc: + error_payload = build_status_payload( + "Error", + "Validation failed", + exc.errors, + questionlibrary=None, + process=None, + ) + return JsonResponse(error_payload, status=400) + + +class ScormToJson(APIView): + """ + Reverse API endpoint: Converts SCORM ZIP file to JSON (mirrors DocxToJson). + This is step 1 of the reverse process: SCORM → JSON. + """ + parser_classes = [MultiPartParser] + permission_classes = [AllowAny] + authentication_classes = [TokenAuthenticationWithBearer] + serializer_class = ScormToJsonSerializer + + def post(self, request, format=None): + file_obj = request.data.get('scorm_file') + serializer = ScormToJsonSerializer(data={ + 'scorm_file': file_obj + }) + + if not serializer.is_valid(): + error_payload = build_status_payload( + "Error", + "Validation failed", + serializer.errors, + questionlibrary=None, + process=None, + ) + return JsonResponse(error_payload, status=400) + + instance = serializer.save() + logger.addFilter(QuestionlibraryFilenameFilter(instance)) + + try: + json_data, question_library = scorm_to_json(instance, logger) + instance.cleanup() + return JsonResponse(json_data, status=200) + except Exception as e: + logger.error(f"SCORM to JSON conversion failed: {str(e)}") + error_payload = build_status_payload( + "Error", + str(e), + "", + questionlibrary=instance, + process=None, + ) + instance.cleanup() + return JsonResponse(error_payload, status=500) + + +class JsonToDocx(APIView): + """ + Reverse API endpoint: Converts JSON to DOCX (mirrors JsonToScorm). + This is step 2 of the reverse process: JSON → DOCX. + """ + parser_classes = [JSONParser] + permission_classes = [AllowAny] + authentication_classes = [TokenAuthenticationWithBearer] + serializer_class = JsonToScormSerializer + + def post(self, request, format=None): + json_data = request.data + try: + file_response, ql_instance = json_to_docx(json_data, logger) + except JsonToDocxError as exc: + error_payload = build_status_payload( + "Error", + "Validation failed", + exc.errors, + questionlibrary=None, + process=None, + ) + return JsonResponse(error_payload, status=400) + except Exception as e: + logger.error(f"JSON to DOCX conversion failed: {str(e)}") + error_payload = build_status_payload( + "Error", + str(e), + "", + questionlibrary=None, + process=None, + ) + return JsonResponse(error_payload, status=500) + + ql_instance.cleanup() + + return file_response + class RootPath(APIView): permission_classes = [AllowAny] diff --git a/pandoc/pandoc-filters/linebreak.lua b/pandoc/pandoc-filters/linebreak.lua new file mode 100644 index 0000000..c9455a5 --- /dev/null +++ b/pandoc/pandoc-filters/linebreak.lua @@ -0,0 +1,11 @@ +-- Replace HTML
tags with proper pandoc LineBreak nodes +return { + RawInline = function(el) + if el.format:match("html") then + local text = el.text:lower() + if text == "
" or text == "
" or text == "
" then + return pandoc.LineBreak() + end + end + end +} diff --git a/qcon/settings.py b/qcon/settings.py index 1c5ac90..6b76b61 100644 --- a/qcon/settings.py +++ b/qcon/settings.py @@ -96,7 +96,6 @@ def get_secret(name: str, default: str = None, required: bool = False, subdirect # Local Apps 'api', - 'restapi' ] diff --git a/qcon/urls.py b/qcon/urls.py index d9a0808..963d3c4 100644 --- a/qcon/urls.py +++ b/qcon/urls.py @@ -22,11 +22,9 @@ # from django.contrib.staticfiles.urls import staticfiles_urlpatterns from django.conf import settings from api import views -from restapi import views urlpatterns = [ path('', include('api.urls')), - path('api/', include('restapi.urls')), path('', views.RootPath.as_view(), name='root') ] diff --git a/requirements.txt b/requirements.txt index e60f977..c898960 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,7 @@ python-dotenv==0.21.0 channels==3.0.5 daphne==3.0.2 Twisted[tls,http2]==22.8.0 -celery==5.2.7 +celery==5.3.6 redis==4.3.4 channels-redis==4.0.0 psycopg2-binary==2.9.5 diff --git a/restapi/__init__.py b/restapi/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/restapi/admin.py b/restapi/admin.py deleted file mode 100644 index 8c38f3f..0000000 --- a/restapi/admin.py +++ /dev/null @@ -1,3 +0,0 @@ -from django.contrib import admin - -# Register your models here. diff --git a/restapi/apps.py b/restapi/apps.py deleted file mode 100644 index 4d2371e..0000000 --- a/restapi/apps.py +++ /dev/null @@ -1,19 +0,0 @@ -from django.apps import AppConfig -from django.conf import settings -import sys -import logging -logger = logging.getLogger(__name__) - -class RestapiConfig(AppConfig): - default_auto_field = 'django.db.models.BigAutoField' - name = 'restapi' - - def ready(self): - if 'runserver' in sys.argv or 'qcon.asgi:application' in sys.argv: - logger.info("APP_VERSION: " + settings.APP_VERSION) - logger.info("IMAGE_TAG: " + settings.IMAGE_TAG) - logger.info("IMAGE_NAME: " + settings.IMAGE_NAME) - if 'runserver' in sys.argv: - logger.warning("qconapi has started in Dev Mode") - else: - logger.info("qconapi has started") diff --git a/restapi/logging/ErrorTypes.py b/restapi/logging/ErrorTypes.py deleted file mode 100644 index 37eb840..0000000 --- a/restapi/logging/ErrorTypes.py +++ /dev/null @@ -1,176 +0,0 @@ - -class MarkDownConversionError(Exception): - def __init__(self, reason, message="MarkDownConversionError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - -class NoTypeDeterminedError(Exception): - def __init__(self, reason, message="NoTypeDeterminedError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - - -class InlineNoTypeError(Exception): - def __init__(self, reason, message="InlineNoTypeError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - -class EndAnswerNoTypeError(Exception): - def __init__(self, reason, message="EndAnswerNoTypeError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - -class EMFImageError(Exception): - def __init__(self, reason, message="EMFImageError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - -class MATEndStructureError(Exception): - def __init__(self, reason, message="MATEndStructureError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - -class MATInlineStructureError(Exception): - def __init__(self, reason, message="MATInlineStructureError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - -class MATNoMatchError(Exception): - def __init__(self, reason, message="MATNoMatchError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - -class MATMissingChoiceError(Exception): - def __init__(self, reason, message="MATMissingChoiceError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - -class MATMissingAnswerError(Exception): - def __init__(self, reason, message="MATMissingAnswerError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - -class MATMissingOptionError(Exception): - def __init__(self, reason, message="MATMissingOptionError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - -class FIBEndStructureError(Exception): - def __init__(self, reason, message="FIBEndStructureError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - -class FIBInlineStructureError(Exception): - def __init__(self, reason, message="FIBInlineStructureError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - -class TFEndStructureError(Exception): - def __init__(self, reason, message="TFEndStructureError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - -class TFInlineStructureError(Exception): - def __init__(self, reason, message="TFInlineStructureError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - -class TFNoAnswerError(Exception): - def __init__(self, reason, message="TFNoAnswerError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - -class TFSelectedAnswerError(Exception): - def __init__(self, reason, message="TFSelectedAnswerError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - -class MCEndStructureError(Exception): - def __init__(self, reason, message="MCEndStructureError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - -class MCInlineStructureError(Exception): - def __init__(self, reason, message="MCInlineStructureError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - -class ORDEndStructureError(Exception): - def __init__(self, reason, message="ORDEndStructureError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - -class ORDInlineStructureError(Exception): - def __init__(self, reason, message="ORDInlineStructureError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - -class MSEndStructureError(Exception): - def __init__(self, reason, message="MSEndStructureError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - -class MSInlineStructureError(Exception): - def __init__(self, reason, message="MSInlineStructureError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - -class WREndStructureError(Exception): - def __init__(self, reason, message="WREndStructureError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - -class WRInlineStructureError(Exception): - def __init__(self, reason, message="WRInlineStructureError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' diff --git a/restapi/logging/WarningTypes.py b/restapi/logging/WarningTypes.py deleted file mode 100644 index d7fadc1..0000000 --- a/restapi/logging/WarningTypes.py +++ /dev/null @@ -1,61 +0,0 @@ -class MCEndAnswerExistWarning(Exception): - def __init__(self, reason, message="MCEndAnswerExistWarning"): - self.reason = reason - self.message = message - - def __str__(self): - return f'{self.message} -> {self.reason}' - - -class MSEndAnswerExistWarning(Exception): - def __init__(self, reason, message="MSEndAnswerExistWarning"): - self.reason = reason - self.message = message - - def __str__(self): - return f'{self.message} -> {self.reason}' - - -class WREndAnswerExistWarning(Exception): - def __init__(self, reason, message="WREndAnswerExistWarning"): - self.reason = reason - self.message = message - - def __str__(self): - return f'{self.message} -> {self.reason}' - - -class RespondusTypeEWarning(Exception): - def __init__(self, reason, message="RespondusTypeEWarning"): - self.reason = reason - self.message = message - - def __str__(self): - return f'{self.message} -> {self.reason}' - - -class RespondusTypeMRWarning(Exception): - def __init__(self, reason, message="RespondusTypeMRWarning"): - self.reason = reason - self.message = message - - def __str__(self): - return f'{self.message} -> {self.reason}' - - -class RespondusTypeFMBWarning(Exception): - def __init__(self, reason, message="RespondusTypeFMBWarning"): - self.reason = reason - self.message = message - - def __str__(self): - return f'{self.message} -> {self.reason}' - - -class RespondusTypeMTWarning(Exception): - def __init__(self, reason, message="RespondusTypeMTWarning"): - self.reason = reason - self.message = message - - def __str__(self): - return f'{self.message} -> {self.reason}' diff --git a/restapi/logging/contextfilter.py b/restapi/logging/contextfilter.py deleted file mode 100644 index f5d6b5b..0000000 --- a/restapi/logging/contextfilter.py +++ /dev/null @@ -1,25 +0,0 @@ -import os -import logging - -class QuestionlibraryFilenameFilter(logging.Filter): - def __init__(self, questionlibrary=None): - self.questionlibrary = questionlibrary - def filter(self, record): - if self.questionlibrary==None: - # record.file = '--' - pass - else: - if self.questionlibrary.temp_file.name != None: - # record.file = 'docx_filename:' + os.path.basename(self.questionlibrary.temp_file.name) - # filename = 'docx_filename:' + os.path.basename(self.questionlibrary.temp_file.name) - filename = 'docx_filename:' + self.questionlibrary.temp_file.name - record.msg = filename + " >>> " + str(record.getMessage()) - elif self.questionlibrary.filtered_main_title != None: - # record.file = 'filtered_main_title:' + os.path.basename(self.questionlibrary.filtered_main_title) - titlename = 'filtered_main_title:' + os.path.basename(self.questionlibrary.filtered_main_title) - record.msg = titlename + " >>> " + str(record.getMessage()) - else: - # record.file = '--' - pass - return True - diff --git a/restapi/logging/logging_adapter.py b/restapi/logging/logging_adapter.py deleted file mode 100644 index 26566f2..0000000 --- a/restapi/logging/logging_adapter.py +++ /dev/null @@ -1,22 +0,0 @@ -import logging - -class FilenameLoggingAdapter(logging.LoggerAdapter): - """ - This example adapter expects the passed in dict-like object to have a - 'connid' key, whose value in brackets is prepended to the log message. - """ - def process(self, msg, kwargs): - user_ip = "" - filename = "" - question = "" - - if 'user_ip' in self.extra: - user_ip = str(self.extra['user_ip']) - - if 'filename' in self.extra: - filename = str(self.extra['filename']) - - if 'question' in self.extra: - question = "#" + str(self.extra['question']) - - return f"{user_ip}:[{filename}]:{question} {msg}", kwargs diff --git a/restapi/models.py b/restapi/models.py deleted file mode 100644 index ad61906..0000000 --- a/restapi/models.py +++ /dev/null @@ -1,818 +0,0 @@ -# from django.db import models -from .tasks import run_pandoc_task -from .process.common.extract_images import extract_images -from .process.formatter.convert_txt import convert_txt -from .process.formatter.fix_numbering import fix_numbering -# from .process.formatter.formatter import run_formatter_parser -from .process.common.restore_images import restore_images - -import xml.etree.ElementTree as ET - -import logging -logger = logging.getLogger(__name__) -import os -import subprocess -import re - -from .logging.ErrorTypes import (WRInlineStructureError, WREndStructureError, MSInlineStructureError, MSEndStructureError, ORDInlineStructureError, ORDEndStructureError, MCInlineStructureError, MCEndStructureError, TFInlineStructureError, TFEndStructureError, FIBInlineStructureError, FIBEndStructureError, MATInlineStructureError, MATEndStructureError, InlineNoTypeError, EndAnswerNoTypeError, NoTypeDeterminedError, MarkDownConversionError) -from .logging.WarningTypes import (RespondusTypeEWarning, RespondusTypeMRWarning, RespondusTypeFMBWarning, RespondusTypeMTWarning) - -import pypandoc -from enum import Enum -from django.utils.translation import gettext_lazy as _ - -class Format: - - ''' - main variables(part of final result) - ''' - filename = None - maincontent_title = None - body = None - end_answers = None - ''' - intermediary variables - ''' - pandoc_result = None - content_after_images_extracted = None - content_converted_to_txt = None - content_numbering_fixed = None - images_list = [] - formatter_result = None - - def __init__(self, temp_file_path, temp_file_name, filename, maincontent_title = None): - self.temp_file_path = temp_file_path - self.temp_file_name = temp_file_name - self.filename = filename - self.maincontent_title = maincontent_title - - def convert_pandoc(self): - try: - result = run_pandoc_task.apply_async(kwargs={"temp_file_path": self.temp_file_path, - "filename": self.temp_file_name }, - ignore_result=False) - self.pandoc_result = result.get() - except Exception as e: - raise Exception(str(e)) - return self - - def extract_images(self): - self.content_after_images_extracted, self.images_list = extract_images(self.pandoc_result) - return self - - def convert_txt(self): - self.content_converted_to_txt = convert_txt(self.temp_file_path, self.filename) - return self - - def fix_numbering(self): - self.content_numbering_fixed = fix_numbering(self.content_after_images_extracted, self.content_converted_to_txt) - return self - - def run_formatter(self): - try: - self.formatter_result = self.run_formatter_parser(self.content_numbering_fixed) - - - - - - - if 'maincontent_title' in self.formatter_result.keys(): - self.maincontent_title = self.formatter_result['maincontent_title'] - if 'body' in self.formatter_result.keys(): - self.body = self.formatter_result['body'] - if 'end_answers' in self.formatter_result.keys(): - self.end_answers = self.formatter_result['end_answers'] - except Exception as e: - raise Exception(str(e)) - return self - - def restore_images(self): - self.body = restore_images(self.body, self.images_list) - - - def run_formatter_parser(self, content): - root = None - - try: - os.chdir('/antlr_build/formatter') - result = subprocess.run('java -cp formatter.jar:* formatter', - shell=True, - input=content.encode("utf-8"), - capture_output=True) - os.chdir('/code') - root = ET.fromstring(result.stdout.decode("utf-8")) - except: - raise FormatterError("Internal error while converting file") - - logger.debug("starting formatter extraction") - - format = {} - - # # ==================================== MAINCONTENT TITLE - maincontenttitle = root.find('maincontent_title') - logger.debug("checking maincontent title") - if maincontenttitle is not None: - main_title = (maincontenttitle.text).strip() - if main_title: - # format["maincontent_title"] = (trim_text(main_title)).lstrip('# ') - format["maincontent_title"] = main_title - else: - format["maincontent_title"] = None - - # # ==================================== BODY - body = root.find('body') - logger.debug("checking formatter body") - if body is not None: - # questionlibrary.formatter_output = body.text.rstrip() + "\n" - # questionlibrary.save() - format["body"] = body.text.rstrip() + "\n" - else: - raise FormatterError("document body not found") - - # ==================================== END ANSWERS - - end_answers = root.find('end_answers') - logger.debug("checking for endanswers block") - if end_answers is not None: - logger.debug("endanswers block found") - # questionlibrary.end_answers_raw = end_answers.text - # questionlibrary.save() - format["end_answers"] = end_answers.text - else: - logger.info("No endanswers block found") - format["end_answers"] = None - - return format - - -class FormatterError(Exception): - def __init__(self, reason, message="Formatter Error"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - - -class BaseQuestion: - def __init__(self, questioncontent=None): - self.questioncontent = questioncontent - self.basetextanswers.clear() - self.answers.clear() - self.question_header_type = None - self.question_header_title = None - self.question_header_points = None - self.questiontype_by_user = None - self.questiontype_processed = None - - - index = None - number_provided = None - questioncontent = None #raw content - - question_header_type = None - question_header_title = None - question_header_points = None - - questiontype_by_user = None - questiontype_processed = None - wr_answer = None - - feedback = None - hint = None - - endanswer = None - - warning_message = [] - info_message = [] - error_message = [] - - ''' - These Vars are only used for processing and not part of final result - ''' - line_elements = None - question_body_part_list = None - - answers = [] - basetextanswers = [] - - - def get_line_elements(self): - self.questioncontent = os.linesep + self.questioncontent - os.chdir('/antlr_build/questionparser') - popen = subprocess.Popen( - 'java -cp questionparser.jar:* questionparser', - shell=True, - stdout=subprocess.PIPE, - stdin=subprocess.PIPE, - stderr=subprocess.PIPE - ) - result, errors = popen.communicate(input=self.questioncontent.encode("utf-8")) - popen.stdout.close() - return_code = popen.wait() - os.chdir('/code') - try: - self.line_elements = ET.fromstring(result.decode("utf-8")) - except Exception as e: - raise Exception(str(e)) - - - return self - - def extract_question_header_elements(self): - - question_header_type = self.line_elements.find('type') - if question_header_type is not None: - self.question_header_type = self.trim_text(question_header_type.text) - - - question_header_title = self.line_elements.find('title') - if question_header_title is not None: - self.question_header_title = self.trim_text(question_header_title.text) - - question_header_points = self.line_elements.find('points') - if question_header_points is not None: - filterpoint = re.search("\d+((.|,)\d+)?", question_header_points.text) - self.question_header_points = float(filterpoint.group()) - return self - - def get_question_body_parts_list (self): - question_body = self.line_elements.find("question_body") - if question_body is None: - raise Exception("Question_body empty") - - self.question_body_part_list = question_body.findall("question_body_part") - if self.question_body_part_list is None: - raise Exception("Question_body empty") - return self - - def get_number_provided(self): - try: - # save question number that was provided - number_provided = self.question_body_part_list[0].find('prefix') - if number_provided is not None: - filter_question_number = re.search("\d+", number_provided.text) - self.number_provided = filter_question_number.group() - return self - # logger.debug("Finished getting question number") - except Exception as e: - raise Exception(f"failed to extract number_provided : {str(e)}") - - def separate_question_and_answers(self): - answer_list = [] - part_of_question_list = [] - try: - # logger.debug( f"#{str(question.number_provided)} Starting splitting body_part into question_content and answers block") - # only if there are multiple question_body parts then proceed to splitting - if (len(self.question_body_part_list) == 1) and (self.question_body_part_list[0].get('prefix_type') == 'NUMLIST_PREFIX'): - part_of_question_list.append(self.question_body_part_list[0]) - else: - # Filter out the last letter enumerated list so that it can be set as the answerlist - start_of_list_found = False - # Start iterating from the last item going up untill the index "a" is found and continue adding the rest of the lists as question content - for question_body_part in reversed(self.question_body_part_list): - if not start_of_list_found: - answer_list.append(question_body_part) - else: - part_of_question_list.append(question_body_part) - if question_body_part.get('prefix_type') == "LETTERLIST_PREFIX" or question_body_part.get('prefix_type') == "CORRECT_ANSWER": - check_index = ''.join(filter(str.isalpha, question_body_part.find('prefix').text.lower())) - if check_index == "a": - start_of_list_found = True - # because we started from the last item we need to reverse the list to bring in correct order - answer_list = answer_list[::-1] - part_of_question_list = part_of_question_list[::-1] - # logger.debug( f"#{str(question.number_provided)} Finished plitting body_part into question_content and answers block") - except Exception as e: - raise Exception(f"failed to split body_part into question_content and answers block : {e}") - - try: - # Combine feedback and answers - # Check if first item is LETTERLIST_PREFIX or CORRECT_ANSWER - if (answer_list[0].get('prefix_type') == "LETTERLIST_PREFIX" or answer_list[0].get('prefix_type') == "CORRECT_ANSWER"): - # raise Exception("First item in Answer list is not a Letterlist item") - for answer in answer_list: - if answer.get('prefix_type') == "LETTERLIST_PREFIX": - current_answer = { - "answer_prefix": answer.find('prefix').text, - "answer_content": answer.find('content').text, - "correct": False, - "feedback": None - } - self.answers.append(current_answer) - elif answer.get('prefix_type') == "CORRECT_ANSWER": - current_answer = { - "answer_prefix": answer.find('prefix').text, - "answer_content": answer.find('content').text, - "correct": True, - "feedback": None - } - self.answers.append(current_answer) - elif answer.get('prefix_type') == "NUMLIST_PREFIX": - current_answer = self.answers.pop() - current_answer.update({"content": current_answer.get("content") + answer.find('content').text}) - self.answers.append(current_answer) - elif answer.get('prefix_type') == "FEEDBACK": - current_answer = self.answers.pop() - current_answer.update({"feedback": answer.find('content').text}) - self.answers.append(current_answer) - elif answer.get('prefix_type') == "HINT": - continue - # logger.debug( f"#{str(question.number_provided)} Finished combining answer block elements items into answers") - except Exception as e: - raise Exception(f"failed to combine answer block elements items into one answers block{e}") - - - try: - # Combine question content, any lists, feedback and hint in one dict - question_from_xml = { - "question_content": "", - "feedback": "", - "hint": "" - } - for index, question_content_item in enumerate(part_of_question_list): - if question_content_item.get('prefix_type') == "FEEDBACK": - question_from_xml.update({"feedback": question_content_item.find('content').text}) - elif question_content_item.get('prefix_type') == "HINT": - question_from_xml.update({"hint": question_content_item.find('content').text}) - else: - question_content = question_from_xml.get("question_content") - question_content_to_append = "" - if index > 0: - question_content_to_append = question_content_item.find('prefix').text - question_content_to_append = question_content_to_append + question_content_item.find('content').text - question_from_xml.update({"question_content": question_content + question_content_to_append}) - - if question_from_xml is not None: - question_text = question_from_xml.get("question_content") - self.questioncontent = question_text - - self.wr_answer = self.line_elements.find("wr_answer") - question_feedback = question_from_xml.get("feedback") - if question_feedback is not None: - self.feedback = question_feedback - question_hint = question_from_xml.get("hint") - if question_hint is not None: - self.hint = question_hint - - except Exception as e: - raise Exception(f"failed to combine question content, any lists, feedback and hint in one dict") - - for answer in self.answers: - self.basetextanswers.append(BaseTextAnswer(answer)) - - return self - - def check_questiontype(self): - if self.endanswer == None: - self.questiontype_processed = self.__check_inline_questiontype() - else: - self.questiontype_processed = self.__check_endanswer_questiontype() - return self - - - def compare_user_type_with_processed_type(self): - match self.questiontype_by_user: - case 'WR' | 'E': - if self.questiontype_by_user == 'E': - self.__add_respondus_type_warning(type_found='E', type_recommended='WR') - if self.endanswer == None: - if not (self.questiontype_processed == 'inline_WR_keyword' or - self.questiontype_processed == 'inline_WR_list'): - self.__add_inline_type_error(type_found='WR') - else: - if not self.questiontype_processed == 'endanswer_WR': - self.__add_endanswer_type_error(type_found='WR') - case 'MS' | 'MR': - if self.questiontype_by_user == 'MR': - self.__add_respondus_type_warning(type_found='MR', type_recommended='MS') - if self.endanswer == None: - if not (self.questiontype_processed == 'inline_MS'): - self.__add_inline_type_error(type_found='MS') - else: - if not self.questiontype_processed == 'endanswer_MS': - self.__add_endanswer_type_error(type_found='MS') - case 'ORD': - if self.endanswer == None: - if not (self.questiontype_processed == 'inline_ORD'): - self.__add_inline_type_error(type_found='ORD') - else: - if not self.questiontype_processed == 'endanswer_ORD': - self.__add_endanswer_type_error(type_found='ORD') - case 'MC': - if self.endanswer == None: - if not (self.questiontype_processed == 'inline_MC'): - self.__add_inline_type_error(type_found='MC') - else: - if not self.questiontype_processed == 'endanswer_MC': - self.__add_endanswer_type_error(type_found='MC') - case 'TF': - if self.endanswer == None: - if not (self.questiontype_processed == 'inline_TF'): - self.__add_inline_type_error(type_found='TF') - else: - if not self.questiontype_processed == 'endanswer_TF': - self.__add_endanswer_type_error(type_found='TF') - case 'FIB' | 'FMB': - if self.questiontype_by_user == 'FMB': - self.__add_respondus_type_warning(type_found='FMB', type_recommended='FIB') - if self.endanswer == None: - if not (self.questiontype_processed == 'inline_FIB'): - self.__add_inline_type_error(type_found='FIB') - else: - if not self.questiontype_processed == 'endanswer_FIB': - self.__add_endanswer_type_error(type_found='FIB') - case 'MAT' | 'MT': - if self.questiontype_by_user == 'MT': - self.__add_respondus_type_warning(type_found='MT', type_recommended='MAT') - case _: - logger.debug("question type not given by user") - return self - - - def build_question(self): - match self.questiontype_processed: - case 'inline_MC': - build_inline_MC(question, answers, is_random, enumeration) - case 'endanswer_MC': - build_endanswer_MC(question, answers, endanswer, is_random, enumeration) - case 'inline_TF': - build_inline_TF(question, answers, enumeration) - case 'endanswer_TF': - build_endanswer_TF(question, answers, endanswer, enumeration) - case 'inline_MS': - build_inline_MS(question, answers, is_random, enumeration) - case 'endanswer_MS': - build_endanswer_MS(question, answers, endanswer, is_random, enumeration) - case 'inline_WR_keyword': - build_inline_WR_with_keyword(question, wr_answer) - case 'inline_WR_list': - build_inline_WR_with_list(question, answers) - case 'endanswer_WR': - build_endanswer_WR_with_list(question, endanswer, wr_answer) - case 'inline_FIB': - build_inline_FIB(question) - case 'endanswer_FIB': - build_endanswer_FIB(question, endanswer) - case 'inline_MAT': - build_inline_MAT(question, answers) - case 'endanswer_MAT': - build_endanswer_MAT(question, endanswer) - case 'inline_ORD': - build_inline_ORD(question, answers) - case 'endanswer_ORD': - build_endanswer_ORD(question, endanswer) - case 'inline_NO_TYPE': - error_message = "Cannot determined the inline question type." - add_error_message(question, error_message) - raise InlineNoTypeError(error_message) - case 'endanswer_NO_TYPE': - error_message = "Cannot determined the end answer question type." - add_error_message(question, error_message) - raise EndAnswerNoTypeError(error_message) - - - def __add_respondus_type_warning(self, type_found, type_recommended): - self.warning_message.append(f'Respondus format "Type: {type_found}" was found on the file. Please use "Type: {type_recommended}" instead.') - - def __add_inline_type_error(self, type_found): - self.error_message.append(f"Inline question structure doesn't conform to {type_found} type question format.") - - def __add_endanswer_type_error(self, type_found): - self.error_message.append(f"End answer question structure doesn't conform to {type_found} type question format.") - - - - def __check_inline_questiontype(self): - answers_length = len(self.answers) - marked_answers_count = 0 - unmarked_answers_count = 0 - matching_answers_count = 0 - KeywordTrueFound = False - KeywordFalseFound = False - - is_fib = re.search(r"\[(.*?)\]", self.questioncontent) - - if answers_length == 0: - if is_fib: - # ==================== FIB confirmed ==================== - logger.debug("Question Type determined: inline_FIB") - return 'inline_FIB' - - if self.wr_answer != None: - # ==================== WR confirmed ==================== - logger.debug("Question Type determined: inline_WR_keyword") - return 'inline_WR_keyword' - - for answer in self.answers: - # answer_text = markdown_to_plain(answer.find('content').text.lower()) - answer_text = self.markdown_to_plain(answer.get("answer_content").lower()) - answer_text = self.trim_text(answer_text) - is_correct = answer.get('correct') - if is_correct: - marked_answers_count += 1 - if not is_correct: - unmarked_answers_count += 1 - - if answer_text == 'true': - KeywordTrueFound = True - - if answer_text == 'false': - KeywordFalseFound = True - matching_answers = re.search(r"(.*)=(.*)", answer_text) - - if matching_answers is not None: - matching_answers_count += 1 - - if answers_length == 2 and KeywordTrueFound == True and KeywordFalseFound == True: - # ==================== TF confirmed ==================== - logger.debug("Question Type determined: inline_TF") - return 'inline_TF' - - if marked_answers_count == 1 and (self.questiontype_by_user != 'MS' and self.questiontype_by_user != 'MR'): - # ==================== MC confirmed ==================== - logger.debug("Question Type determined: inline_MC") - return 'inline_MC' - - if marked_answers_count > 1 or (self.questiontype_by_user == 'MS' or self.questiontype_by_user == 'MR'): - # ==================== MS confirmed ==================== - logger.debug("Question Type determined: inline_MS") - return 'inline_MS' - - if matching_answers_count == answers_length and matching_answers_count > 1 : - # ==================== MAT confirmed ==================== - logger.debug("Question Type determined: inline_MAT") - return 'inline_MAT' - - if (unmarked_answers_count == 1 and answers_length == 1) or (self.questiontype_by_user == 'WR' or self.questiontype_by_user == 'E'): - # ==================== WR confirmed ==================== - logger.debug("Question Type determined: inline_WR_list") - return 'inline_WR_list' - - if answers_length > 0 and unmarked_answers_count == answers_length: - # ==================== ORD confirmed ==================== - logger.debug("Question Type determined: inline_ORD") - return 'inline_ORD' - logger.debug("Question Type determined: inline_NO_TYPE") - return 'inline_NO_TYPE' - - - - - def __check_endanswer_questiontype(self): - answers_length = len(self.answers) - endanswer_text = self.markdown_to_plain(self.endanswer.answer.lower()) - endanswer_text = self.trim_text(endanswer_text) - - if answers_length > 0: - # possible TF, MC, MS - answer_list = list(map(str.strip, endanswer_text.split(','))) - answer_key_length = len(answer_list) - KeywordTrueFound = False - KeywordFalseFound = False - - for answer in self.answers: - answer_text = self.markdown_to_plain(answer.find('content').text.lower()) - answer_text = self.trim_text(answer_text) - - for choice_answer in answer_list: - correctanswer_index = (ord(choice_answer)-97) - - if correctanswer_index <= (answers_length-1): - # answer index exist - pass - else: - return 'endanswer_NO_TYPE' - - - if answer_text == 'true': - KeywordTrueFound = True - - if answer_text == 'false': - KeywordFalseFound = True - - if answers_length == 2 and KeywordTrueFound == True and KeywordFalseFound == True: - # ==================== TF confirmed ==================== - return 'endanswer_TF' - - if answer_key_length == 1 and (self.questiontype_by_user != 'MS' and self.questiontype_by_user != 'MR'): - # ==================== MC confirmed ==================== - return 'endanswer_MC' - - if (self.questiontype_by_user == 'MS' or self.questiontype_by_user == 'MR') or answer_key_length > 1: - # ==================== MS confirmed ==================== - return 'endanswer_MS' - - else: - # possible FIB, MAT, ORD, WR - matching_answers_count = 0 - is_fib = re.findall(r"\[(.*?)\]", self.questioncontent) - answer_list = list(map(str.strip, endanswer_text.split(';'))) - answer_key_length = len(answer_list) - for answer in answer_list: - matching_answer = re.search(r"(.*)=(.*)", answer) - - if matching_answer is not None: - matching_answers_count += 1 - - if matching_answers_count == answer_key_length and matching_answers_count > 1 : - # ========================= MAT confirmed ======================= - return 'endanswer_MAT' - - if len(is_fib) == answer_key_length: - # ========================= FIB confirmed ======================= - return 'endanswer_FIB' - - if answer_key_length > 1: - # ========================= ORD confirmed ======================= - return 'endanswer_ORD' - - if answer_key_length == 1: - # ========================= WR confirmed ======================= - return 'endanswer_WR' - - return 'endanswer_NO_TYPE' - - - # def build_inline_MC(question, answers, is_random, enumeration): - - # logger.debug("building inline mc") - # question.questiontype = 'MC' - # question.save() - - # mc_object = MultipleChoice.objects.create(question=question) - # if is_random == True: - # mc_object.randomize = True - - # if enumeration: - # mc_object.enumeration = enumeration - # mc_object.save() - # # grab all answers - # for answer_order, answer_item in enumerate(answers): - # mc_answerobject = MultipleChoiceAnswer.objects.create(multiple_choice=mc_object) - # answer_index = trim_text(answer_item.get('answer_prefix')) - # mc_answerobject.index = re.sub(r'[\W_]', '', answer_index) - # mc_answerobject.order = answer_order + 1 - # mc_answerobject.answer = trim_md_to_html(answer_item.get('answer_content')) - # answer_feedback = answer_item.get('feedback') - # is_correct = answer_item.get('correct') - # if answer_feedback != None: - # mc_answerobject.answer_feedback = trim_md_to_html(answer_feedback) - - # if is_correct: - # mc_answerobject.weight = 100 - - # mc_answerobject.save() - - - @staticmethod - def markdown_to_plain(text): - plain_text = pypandoc.convert_text(text, format="markdown_github+fancy_lists+emoji", to="plain", extra_args=['--wrap=none']) - return plain_text - - - @staticmethod - def trim_text(txt): - text = txt.strip() - text = re.sub('', '', text) - text = re.sub('', '\n', text, flags=re.IGNORECASE) - text = text.strip(" \n") - return text - - -class Section: - ''' - main sectioner variables - ''' - title = None - order = None - is_main_content = None - sectionheader = None - sectioncontent = None - - def __init__(self, title=None, - order=None, - is_main_content=None, - sectionheader=None, - sectioncontent=None): - self.title = title - self.order = order - self.is_main_content = is_main_content - self.sectionheader = sectionheader - self.sectioncontent = sectioncontent - - ''' - section variables for processing - ''' - content_from_formatter = None - content_after_images_extracted = None - - -class SectionList: - content = None - sections_list = [] - def __init__(self, content=None): - self.content = content - self.sections_list.clear() - - def run_sectioner(self): - logger.info("sectioner starting") - - content = os.linesep + self.content - - try: - os.chdir('/antlr_build/sectioner') - result = subprocess.run( - 'java -cp sectioner.jar:* sectioner', - shell=True, - input=content.encode("utf-8"), - capture_output=True) - os.chdir('/code') - except: - raise SectionerError("error while reading sections") - - logger.debug("starting sections extraction") - - root = None - try: - root = ET.fromstring(result.stdout.decode("utf-8")) - except: - raise SectionerError("Sectioner results empty") - - # logger.info(ET.tostring(root, encoding='utf8')) - - if len(root) == 0: - raise SectionerError("No Sections found") - - try: - for section in root: - sectionobj = Section() - - sectionobj.order = int(section.attrib.get("id")) + 1 - sectiontitle = section.find('title') - if sectiontitle is not None: - sectionobj.title = sectiontitle.text - - maincontent = section.find('maincontent') - if maincontent is not None: - sectionobj.title = content - sectionobj.is_main_content = True - sectionobj.sectioncontent = maincontent.text - - sectionheader = section.find('sectionheader') - if sectionheader is not None: - sectionobj.is_main_content = False - sectionobj.sectionheader = sectionheader.text - - sectioncontent = section.find('sectioncontent') - if sectioncontent is not None: - sectionobj.is_main_content = False - sectionobj.sectioncontent = sectioncontent.text - - self.sections_list.append(sectionobj) - except: - raise SectionerError("Error extracting section contents") - - return self - - -class QuestionList: - content = None - question_list = [] - - def __init__(self, content=None): - self.content = content - self.question_list.clear() - -class SectionerError(Exception): - def __init__(self, reason, message="Sectioner Error"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - - - -class BaseTextAnswer(): - def __init__(self, answer): - self.answer_prefix = answer['answer_prefix'] - self.MarkedWithStar = answer['correct'] - self.answer_content = answer['answer_content'] - self.feedback = answer['feedback'] - - class EnumeratorTypes(Enum): - LOWERCASELETTERS = 'LOWERCASELETTERS', _('LOWERCASELETTERS') - UPPERCASELETTERS = 'UPPERCASELETTERS', _('UPPERCASELETTERS') - NUMBERS = 'NUMBERS', _('NUMBERS') - ROMAN_NUMERALS = 'ROMAN_NUMERALS', _('ROMAN_NUMERALS') - UPPERCASE_ROMAN_NUMERALS = 'UPPERCASE_ROMAN_NUMERALS', _('UPPERCASE_ROMAN_NUMERALS') - NO_ENUMERATION = 'NO_ENUMERATION', _('NO_ENUMERATION') - - enumerator = EnumeratorTypes.LOWERCASELETTERS - answer_prefix = None - answer_content = None - MarkedWithStar = False - - def __str__(self): - return f"[{self.answer_prefix}][marked*:{ self.MarkedWithStar }][content:{self.answer_content[0:20]}]" - diff --git a/restapi/process/common/extract_images.py b/restapi/process/common/extract_images.py deleted file mode 100644 index 09f7f1e..0000000 --- a/restapi/process/common/extract_images.py +++ /dev/null @@ -1,29 +0,0 @@ -def extract_images(content): - import re - import logging - logger = logging.getLogger(__name__) - - images_list = [] - try: - x = re.findall(r"\", content) - if len(x) == 0: - return content, images_list - for image in x: - images_list.append(image) - - for index, image in enumerate(images_list): - val = re.escape(image) - x = re.sub(val, "<<<<"+ str(index) +">>>>" , content) - content = x - return content, images_list - except Exception as e: - raise ImageExtractError(e) - - -class ImageExtractError(Exception): - def __init__(self, reason, message=""): - self.reason = reason - self.message = message - - def __str__(self): - return f'{self.message} -> {self.reason}' \ No newline at end of file diff --git a/restapi/process/common/process_helper.py b/restapi/process/common/process_helper.py deleted file mode 100644 index 08bf5f8..0000000 --- a/restapi/process/common/process_helper.py +++ /dev/null @@ -1,66 +0,0 @@ -import re -import pypandoc - -def add_info_message(question, info_message): - if question.info: - if info_message not in question.info: - question.info = question.info + "\n" + info_message - question.save() - - else: - question.info = info_message - question.save() - -def add_warning_message(question, warning_message): - if question.warning: - if warning_message not in question.warning: - question.warning = question.warning + "\n" + warning_message - question.save() - - else: - question.warning = warning_message - question.save() - -def add_error_message(obj, error_message): - if obj.error: - if error_message not in obj.error: - obj.error = obj.error + "\n" + error_message - obj.save() - - else: - obj.error = error_message - obj.save() - -def trim_text(txt): - text = txt.strip() - text = re.sub('', '', text) - text = re.sub('', '\n', text, flags=re.IGNORECASE) - text = text.strip(" \n") - return text - -def markdown_to_plain(text): - plain_text = pypandoc.convert_text(text, format="markdown_github+fancy_lists+emoji", to="plain", extra_args=['--wrap=none']) - return plain_text - -def html_to_plain(text): - plain_text = pypandoc.convert_text(text, format="html", to="plain", extra_args=['--wrap=none']) - return plain_text - -def markdown_to_html(text): - html_text = pypandoc.convert_text(text, format="markdown_github+fancy_lists+emoji+task_lists+hard_line_breaks+pipe_tables+all_symbols_escapable+tex_math_dollars", to="html", extra_args=['--mathml', '--ascii']) - str_text = str(html_text) - str_text = re.sub('', lambda x: '
', str_text) - str_text = re.sub('
', lambda x: '', str_text) - str_text = re.sub('', lambda x: '', str_text) - return str_text - -def trim_md_to_plain(text): - text_content = trim_text(text) - text_content = markdown_to_plain(text_content) - return text_content - -def trim_md_to_html(text): - text_content = trim_text(text) - text_content = markdown_to_html(text_content) - text_content = text_content.strip('\n') - return text_content diff --git a/restapi/process/common/restore_images.py b/restapi/process/common/restore_images.py deleted file mode 100644 index f1af146..0000000 --- a/restapi/process/common/restore_images.py +++ /dev/null @@ -1,26 +0,0 @@ -def restore_images(content, images_list): - import re - import logging - logger = logging.getLogger(__name__) - - # This is to conditionally replace every match with the image at the index of the images_list - try: - if content is None: - return None - def replTxt(match): - x = re.search(r"\d+", match.group()) - if int(x.group()) < len(images_list): - return images_list[int(x.group())] - a = re.compile(r"(\<\<\<\<\d+\>\>\>\>)") - result = a.sub(replTxt, content) - return result - except Exception as e: - raise ImageRestoreError(e) - -class ImageRestoreError(Exception): - def __init__(self, reason, message=""): - self.reason = reason - self.message = message - - def __str__(self): - return f'{self.message} -> {self.reason}' \ No newline at end of file diff --git a/restapi/process/endanswers.py b/restapi/process/endanswers.py deleted file mode 100644 index 7ceedd8..0000000 --- a/restapi/process/endanswers.py +++ /dev/null @@ -1,45 +0,0 @@ -import os -import subprocess -import xml.etree.ElementTree as ET -from ..models import EndAnswer -import re - -def get_endanswers(questionlibrary): - if questionlibrary.end_answers_raw == None: - return 0 - os.chdir('/antlr_build/endanswers') - result = subprocess.run( - 'java -cp endanswers.jar:* endanswers', - shell=True, - input=questionlibrary.end_answers_raw.encode("utf-8"), - capture_output=True) - os.chdir('/code') - root = None - try: - root = ET.fromstring(result.stdout.decode("utf-8")) - except: - raise EndAnswerError("Cannot read endanswers") - answers = root.findall("answer") - endanswers_found = 0 - if answers is not None: - for answer in answers: - endanswer = EndAnswer.objects.create(question_library=questionlibrary) - content = answer.find('content').text - index = answer.find('index').text - indexdigit = re.search(r'\d+', index) - endanswer.index = indexdigit.group(0) - endanswer.answer = content - endanswers_found += 1 - endanswer.save() - else: - raise EndAnswerError("No Answers in EndAnswer") - questionlibrary.save() - return endanswers_found - -class EndAnswerError(Exception): - def __init__(self, reason, message="EndAnswer Error"): - self.reason = reason - self.message = message - - def __str__(self): - return f'{self.message} -> {self.reason}' \ No newline at end of file diff --git a/restapi/process/formatter/convert_txt.py b/restapi/process/formatter/convert_txt.py deleted file mode 100644 index cfd0fbc..0000000 --- a/restapi/process/formatter/convert_txt.py +++ /dev/null @@ -1,42 +0,0 @@ -def convert_txt(original_file_path, actual_filename): - import os - import subprocess - import uuid - from pathlib import Path - import glob - import shutil - txt_file_uuid = uuid.uuid4() - txt_lines = "" - - try: - Path("/code/temp").mkdir(parents=True, exist_ok=True) - os.chdir('/code/temp') - - subprocess.run(["soffice", - "--headless", - "--convert-to", - "txt", - "--outdir", - str(txt_file_uuid), - original_file_path], - capture_output=True) - - txt_file_path = glob.glob(f"/code/temp/{str(txt_file_uuid)}/*.txt")[0] - text_file = Path(str(txt_file_path)) - if text_file.is_file(): - f = open(txt_file_path , mode='r', encoding='utf-8-sig') - lines = f.read() - txt_lines = '\n' + lines - f.close() - shutil.rmtree("/code/temp/"+str(txt_file_uuid), ignore_errors=True) - return txt_lines - except Exception as e: - raise ConvertTxtError(e) - - -class ConvertTxtError(Exception): - def __init__(self, reason, message="ConvertTxtError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' \ No newline at end of file diff --git a/restapi/process/formatter/fix_numbering.py b/restapi/process/formatter/fix_numbering.py deleted file mode 100644 index a3fe7eb..0000000 --- a/restapi/process/formatter/fix_numbering.py +++ /dev/null @@ -1,92 +0,0 @@ -def fix_numbering(content_images_tagged, content_txt): - import os - import re - import html - import jaro - import logging - logger = logging.getLogger(__name__) - - try: - #remove empty lines - ref_array = os.linesep.join([s for s in content_txt.splitlines() if s]) - - # make array by splitting lines - ref_array = ref_array.splitlines() - pandoc_array = content_images_tagged.splitlines() - - ref_index = 0 - highest_score = 0 - for pandoc_index, pandoc_ref in enumerate(pandoc_array): - # check if a list item - number_pandoc = re.search(r"^ *([0-9]+)\\?[)|.]", pandoc_ref) - if number_pandoc: - # unescape html characters like ’ etc - pandoc_comp = html.unescape(pandoc_ref) - # remove all non-letter characters - pandoc_comp = re.findall(r'[a-zA-Z0-9]+', pandoc_comp) - pandoc_comp = ''.join(pandoc_comp) - for ref_index_it, ref_element in enumerate(ref_array[ref_index:len(ref_array)], start=ref_index): - # remove all non-letter/number characters - ref_comp = re.findall(r'[a-zA-Z0-9]+', ref_element) - ref_comp = ''.join(ref_comp) - - number_ref = re.search(r"^ *([0-9]+)\\?[)|.]", ref_element) - number_ref_alt = re.search(r"^ *([0-9]+)", ref_element) - - jaro_score = jaro.jaro_metric(ref_comp,pandoc_comp) - - #check if reference is a number and skip if not a number - if not number_ref: - if number_ref_alt: - if jaro_score > 0.9: - error_question = number_pandoc.group(1) - if number_ref_alt: - error_question = number_ref_alt.group(1) - raise QuestionEnumerationError(f'did not match the supported qcon numberlist pattern "." or ") at question: {error_question}') - continue - - ### FOR DEBUGGING specific line - # debug_line = '47' - # if number_pandoc.group(1) == debug_line: - # logger.debug(f"ref_index = {ref_index} ref_index_it = {ref_index_it}") - # logger.debug(f"ref_element = {ref_element}") - # logger.debug(f"ref: {ref_comp[0:120]}") - # logger.debug(f"pandoc: {pandoc_comp[0:120]}") - # logger.debug(f"score: {jaro_score}") - - if jaro_score > 0.9: - # matched by similarity - # if number_ref: - if number_ref.group(1) != number_pandoc.group(1): - logger.debug(f"mismatch found [ref]:[pandoc]-[{number_ref.group(1)}:{number_pandoc.group(1)}]") - subbed = re.sub(r"[0-9]+", number_ref.group(1), pandoc_array[pandoc_index]) - pandoc_array[pandoc_index] = subbed - logger.debug(f"mismatch fixed [ref]:[pandoc]-[{number_ref.group(1)}:{number_pandoc.group(1)}]->[{number_ref.group(1)}:{number_ref.group(1)}]") - ref_index = ref_index_it+1 - break - else: - # number is the same and doesn't need fixing - ref_index = ref_index_it+1 - break - else: - # no match; continue searching - if jaro_score > highest_score: - highest_score = jaro_score - # reached end of ref array without finding a match, comparison strings need to be checked or score needs to be adjusted - if ref_index_it == len(ref_array) - 1: - error_question = number_pandoc.group(1) - logger.warning(f'No reference line found with a high enough similarity score[{highest_score}] for question: {error_question}') - raise QuestionEnumerationError(f'No reference line found with a high enough similarity score[{highest_score}] for question: {error_question}') - - combined_string = '\n'.join(pandoc_array) - return '\n' + combined_string - - except Exception as e: - raise Exception(e) - -class QuestionEnumerationError(Exception): - def __init__(self, reason, message="QuestionEnumerationError"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' \ No newline at end of file diff --git a/restapi/process/formatter/formatter.py b/restapi/process/formatter/formatter.py deleted file mode 100644 index b50c2fb..0000000 --- a/restapi/process/formatter/formatter.py +++ /dev/null @@ -1,126 +0,0 @@ -from ast import Not -import os -import xml.etree.ElementTree as ET -import subprocess -import re - -import logging -logger = logging.getLogger(__name__) - -def run_formatter_parser(content, filename): - root = None - - try: - os.chdir('/antlr_build/formatter') - result = subprocess.run('java -cp formatter.jar:* formatter', - shell=True, - input=content.encode("utf-8"), - capture_output=True) - os.chdir('/code') - root = ET.fromstring(result.stdout.decode("utf-8")) - except: - raise FormatterError("Internal error while converting file") - - logger.debug("starting formatter extraction") - - format = {} - -# # ==================================== MAINCONTENT TITLE - maincontenttitle = root.find('maincontent_title') - logger.debug("checking maincontent title") - if maincontenttitle is not None: - main_title = (maincontenttitle.text).strip() - if main_title: - # format["maincontent_title"] = (trim_text(main_title)).lstrip('# ') - format["maincontent_title"] = main_title - else: - format["maincontent_title"] = None - -# # ==================================== BODY - body = root.find('body') - logger.debug("checking formatter body") - if body is not None: - # questionlibrary.formatter_output = body.text.rstrip() + "\n" - # questionlibrary.save() - format["body"] = body.text.rstrip() + "\n" - else: - raise FormatterError("document body not found") - -# ==================================== END ANSWERS - - end_answers = root.find('end_answers') - logger.debug("checking for endanswers block") - if end_answers is not None: - logger.debug("endanswers block found") - # questionlibrary.end_answers_raw = end_answers.text - # questionlibrary.save() - format["end_answers"] = end_answers.text - else: - logger.info("No endanswers block found") - format["end_answers"] = None - - return format - - - - - - -# def run_formatter_parser(content, filename): -# logger = FilenameLoggingAdapter(newlogger, {'filename': filename}) -# root = None - -# try: -# os.chdir('/antlr_build/formatter') -# result = subprocess.run('java -cp formatter.jar:* formatter', -# shell=True, -# input=content.encode("utf-8"), -# capture_output=True) -# os.chdir('/code') -# root = ET.fromstring(result.stdout.decode("utf-8")) -# except: -# raise FormatterError("Internal error while converting file") - -# logger.debug("starting formatter extraction") - -# # format = Format() -# # ==================================== SECTION INFO - -# maincontenttitle = root.find('maincontent_title') -# logger.debug("checking maincontent title") -# if maincontenttitle is not None: -# main_title = (maincontenttitle.text).strip() -# if main_title: -# format.maincontent_title = (trim_text(main_title)).lstrip('# ') -# else: -# format.maincontent_title = None -# # ==================================== BODY - -# body = root.find('body') -# if body is not None: -# # questionlibrary.formatter_output = body.text.rstrip() + "\n" -# # questionlibrary.save() -# format.body = body.text.rstrip() + "\n" -# else: -# raise FormatterError("document body not found") - -# # ==================================== END ANSWERS - -# end_answers = root.find('end_answers') -# logger.debug("checking for endanswers block") -# if end_answers is not None: -# logger.debug("endanswers block found") -# # questionlibrary.end_answers_raw = end_answers.text -# # questionlibrary.save() -# format.end_answers = end_answers.text -# else: -# logger.info("No endanswers block found") - -# return format - -class FormatterError(Exception): - def __init__(self, reason, message="Formatter Error"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' diff --git a/restapi/process/questionparser/questionparser.py b/restapi/process/questionparser/questionparser.py deleted file mode 100644 index 29fde05..0000000 --- a/restapi/process/questionparser/questionparser.py +++ /dev/null @@ -1,141 +0,0 @@ -from ast import Not -import os -import xml.etree.ElementTree as ET -import subprocess -import re -# from ...models import Question - -import logging -logger = logging.getLogger(__name__) - -from enum import Enum -from django.utils.translation import gettext_lazy as _ - -def run_questionparser(question): - - question.get_line_elements() - question.extract_question_header_elements() - question.get_question_body_parts_list() - question.get_number_provided() - question.separate_question_and_answers() - - # question.check_questiontype() - # question.compare_user_type_with_processed_type() - - # question.build_question() - # logger.info(dir(question)) - - # q = Question() - # m = MultipleChoice() - # m.enumeration = "letters" - # m.answers = [MultipleChoiceAnswer(1,1,"answer1"), - # MultipleChoiceAnswer(2,2,"answer two")] - # q.processedquestion = m - - # b = BaseTextAnswer() - # b.answer_content = "hallo ik ben base van base van g;kwjefn;ewlrkfm;owemkl" - - # manswer = MultipleChoiceAnswer(basetextanswer=b) - # print(manswer) - # print(question.basetextanswers) - print(help(question)) - - - return question - - - -# class MultipleChoice(): -# randomize = None -# enumeration = None -# answers = [] - - - -# class BaseTextAnswer(): -# def __init__(self, answerlistitem): -# self.answer_prefix = answerlistitem['answer_prefix'] -# self.MarkedWithStar = MarkedWithStar -# self.answer_content = answer_content -# self.feedback = feedback - -# class EnumeratorTypes(Enum): -# LOWERCASELETTERS = 'LOWERCASELETTERS', _('LOWERCASELETTERS') -# UPPERCASELETTERS = 'UPPERCASELETTERS', _('UPPERCASELETTERS') -# NUMBERS = 'NUMBERS', _('NUMBERS') -# ROMAN_NUMERALS = 'ROMAN_NUMERALS', _('ROMAN_NUMERALS') -# UPPERCASE_ROMAN_NUMERALS = 'UPPERCASE_ROMAN_NUMERALS', _('UPPERCASE_ROMAN_NUMERALS') -# NO_ENUMERATION = 'NO_ENUMERATION', _('NO_ENUMERATION') - -# enumerator = EnumeratorTypes.LOWERCASELETTERS -# enumindex = None -# answer_content = None -# MarkedWithStar = False - -# def __str__(self): -# return f"[{self.enumindex}][marked*:{ self.MarkedWithStar }][content:{self.answer_content[0:20]}]" - -# class MultipleChoiceAnswer(BaseTextAnswer): -# def __init__(self, basetextanswer=None, index=None, order=None): -# self.index = index -# self.order=order -# # super(MultipleChoiceAnswer, self).__init__() -# if type(basetextanswer) is BaseTextAnswer: -# super().__init__(basetextanswer) -# # self.answer=answer -# index = None -# order = None -# # answer = None -# answer_feedback = None -# weight = None -# def __str__(self): -# return f"[{self.index}][marked*:{ self.MarkedWithStar }][content:]" - - -# class TrueFalse(): -# true_weight = None -# true_feedback = None -# false_weight = None -# false_feedback = None -# enumeration = None - -# class Fib(): -# type = None -# text = None -# order = None -# size = None -# weight = None - -# class MultipleSelect(): -# randomize = None -# enumeration = None -# style = None -# grading_type = None - -# class MultipleSelectAnswer(): -# index = None -# order = None -# answer = None -# answer_feedback = None -# is_correct = None - -# class Matching(): -# grading_type = None - - -# class MatchingChoice(): -# choice_text = None - -# class MatchingAnswer(): -# answer_text = None - -# class Ordering(): -# text = None -# order = None -# ord_feedback = None - -# class WrittenResponse(): -# enable_student_editor = None -# initial_text = None -# answer_key = None -# enable_attachments = None \ No newline at end of file diff --git a/restapi/process/sectioner/sectioner.py b/restapi/process/sectioner/sectioner.py deleted file mode 100644 index 265eae9..0000000 --- a/restapi/process/sectioner/sectioner.py +++ /dev/null @@ -1,82 +0,0 @@ -import os -import subprocess -import xml.etree.ElementTree as ET -# from .process_helper import markdown_to_plain, trim_text, markdown_to_html -# from api.tasks import markdown_to_plain, trim_text, markdown_to_html -from ...models import Section - -import logging -newlogger = logging.getLogger(__name__) -from api.logging.logging_adapter import FilenameLoggingAdapter - -# This is to split sections into separate objects -def run_sectioner(sectionlist): - logger = FilenameLoggingAdapter(newlogger, { - 'filename': "" - }) - logger.info("sectioner starting") - - content = os.linesep + sectionlist.content - - try: - os.chdir('/antlr_build/sectioner') - result = subprocess.run( - 'java -cp sectioner.jar:* sectioner', - shell=True, - input=content.encode("utf-8"), - capture_output=True) - os.chdir('/code') - except: - raise SectionerError("error while reading sections") - - logger.debug("starting sections extraction") - - root = None - try: - root = ET.fromstring(result.stdout.decode("utf-8")) - except: - raise SectionerError("Sectioner results empty") - - # logger.info(ET.tostring(root, encoding='utf8')) - - if len(root) == 0: - raise SectionerError("No Sections found") - - try: - for section in root: - sectionobj = Section() - - sectionobj.order = int(section.attrib.get("id")) + 1 - sectiontitle = section.find('title') - if sectiontitle is not None: - sectionobj.title = sectiontitle.text - - maincontent = section.find('maincontent') - if maincontent is not None: - sectionobj.title = content - sectionobj.is_main_content = True - sectionobj.sectioncontent = maincontent.text - - sectionheader = section.find('sectionheader') - if sectionheader is not None: - sectionobj.is_main_content = False - sectionobj.sectionheader = sectionheader.text - - sectioncontent = section.find('sectioncontent') - if sectioncontent is not None: - sectionobj.is_main_content = False - sectionobj.sectioncontent = sectioncontent.text - - sectionlist.sections_list.append(sectionobj) - except: - raise SectionerError("Error extracting section contents") - - return sectionlist - - -class SectionerError(Exception): - def __init__(self, reason, message="Sectioner Error"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' \ No newline at end of file diff --git a/restapi/process/splitter/splitter.py b/restapi/process/splitter/splitter.py deleted file mode 100644 index 2aaf64f..0000000 --- a/restapi/process/splitter/splitter.py +++ /dev/null @@ -1,162 +0,0 @@ -import os -import subprocess -import xml.etree.ElementTree as ET -# from api.tasks import trim_text -import logging -logger = logging.getLogger(__name__) - -from ...models import QuestionList -from ...models import BaseQuestion -# from ...models import Question - - -import re -import os - -class Splitter(QuestionList): - def __init__(self, content) -> None: - super().__init__(content=content) - self.total_questions_found = 0 - self.current_section_starts_with_1 = False - - def add_newlines_before_question(self): - lines_altered = [] - lines_original = self.content.splitlines() - # logger.debug("raw_content") - # logger.debug(section.raw_content) - # logger.debug("lines original") - # logger.debug(lines_original) - - # check if the first question was found already - number_1_found = False - for line in lines_original: - number_prefix = re.search(r"^ *(\d+)[\\]{0,2}[.|)]", line) - if number_prefix: - numbered_line = int(number_prefix.group(1)) - if numbered_line != 1: - #this section doesn't start with 1 so we dont need to check for it further - number_1_found = True - self.current_section_starts_with_1 = False - break - else: - number_1_found = False - self.current_section_starts_with_1 = True - break - tracklist = 0 - newline_detected = False - # letterlist_enumvalue = '' - for line in lines_original: - # check if newlines are detected.(newlines cancel lists) - if '' in line: - #means newline is in this line so it canceled the previous list tracking - # reset list back to zero - newline_detected = True - tracklist = 0 - if number_1_found: - #check if the current line is a numbered line - number_prefix = re.search(r"^ *(\d+)[\\]{0,2}[.|)]", line) - if number_prefix: - numbered_line = int(number_prefix.group(1)) - #it is a numbered line, so check if it is a #1 - if numbered_line == 1: - # starting a new numbered list - tracklist = 1 - newline_detected = False # reset to allow new list to be tracked - else: - # check if we were in a list on the previous numbered line - if tracklist == 0: - # we were not a list on the previous numbered line - lines_altered.append('\n') - else: - # we were in a list on the previous line - # check if we still are on a list on this line - if numbered_line == tracklist+1: - # this means we might still be inside a list. - # to make sure lets see if a newline was detected prior to this line - if newline_detected: - # there was a newline detected so this means the list is cancelled - # reset the list tracker to zero - tracklist = 0 - # and because the list was cancelled we can assume this line to be a new question - lines_altered.append('\n') - # reset the newline_detected to False - newline_detected = False - else: - #update tracklist to track the current list further - tracklist = numbered_line - # TODO WARN USER ABOUT POTENTIAL NEWLINE NEEDED HERE?? But we don't know the criteria to detect this issue yet. more development needed here - else: - # this means we have exited the list, and is safe to assume this is a new question - lines_altered.append('\n') - tracklist = 0 - else: - # look for first question - if re.search(r"^ *1[\\]{0,2}[.|)]", line): - number_1_found = True - lines_altered.append(line) - result = os.linesep.join(lines_altered) - result = os.linesep + result - self.content = result - return self - - - def split_questions(self): - root = None - try: - os.chdir('/antlr_build/splitter') - result = subprocess.run( - 'java -cp splitter.jar:* splitter', - shell=True, - input=self.content.encode("utf-8"), - capture_output=True) - os.chdir('/code') - root = ET.fromstring(result.stdout.decode("utf-8")) - except Exception as e: - raise SplitterError("ANTLR: " + str(e)) - - # COPY contents of first element into the second element because this sections does not start with number 1. - # meaning that the contents of the first element belongs - # to the first question in this section - if not self.current_section_starts_with_1: - if len(root) > 1: - root[1][0].text = str(root[0][0].text) + str(root[1][0].text) - root.remove(root[0]) - #renumber the question id because the first element was removed after being copied to the second element - id = 0 - for question in root: - question.attrib["id"] = str(id) - id += 1 - - try: - for index, question in enumerate(root, start=1): - questionobj = BaseQuestion() - questionobj.index = index - questioncontent = question.find('questioncontent') - if questioncontent is not None: - questionobj.questioncontent = questioncontent.text - self.question_list.append(questionobj) - except: - # sectionobject.error = "Failed to process questions in section" - raise SplitterError("Failed to process questions in section") - # return self.questionlist - - - -class SplitterError(Exception): - def __init__(self, reason, message="Splitter Error"): - self.reason = reason - self.message = message - def __str__(self): - return f'{self.message} -> {self.reason}' - - - - - - - - - - - - diff --git a/restapi/serializers.py b/restapi/serializers.py deleted file mode 100644 index 0eadcf6..0000000 --- a/restapi/serializers.py +++ /dev/null @@ -1,58 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. - -from rest_framework import serializers -from django.conf import settings - -class FormatSerializer(serializers.Serializer): - filename = serializers.CharField(required=False) - maincontent_title = serializers.CharField(required=False) - body = serializers.CharField() - end_answers = serializers.CharField(required=False) - - -class SectionSerializer(serializers.Serializer): - title = serializers.CharField(required=False) - order = serializers.IntegerField(max_value=None, min_value=None, required=False) - is_main_content = serializers.BooleanField(required=False) - sectionheader = serializers.CharField(allow_null=True, required=False) - sectioncontent = serializers.CharField() - -class SectionListSerializer(serializers.Serializer): - sections_list = SectionSerializer(many=True, required=False, allow_null=True) - -class QuestionBaseSerializer(serializers.Serializer): - index = serializers.IntegerField(max_value=None, min_value=None, required=False) - questioncontent = serializers.CharField(required=False) -class QuestionListSerializer(serializers.Serializer): - question_list = QuestionBaseSerializer(many=True) - -class ProcessedQuestionSerializer(serializers.Serializer): - randomize = serializers.CharField(required=False) - enumeration = serializers.CharField(required=False) -# answers = serializers.ListField( -# child=serializers.IntegerField(min_value=0, max_value=100) -# ) - -class BasetextAnswerSerializer(serializers.Serializer): - enumindex = serializers.CharField(required=False) - answer_content = serializers.CharField(required=False) - -class BaseTextAnswerField(serializers.Field): - def to_representation(self, value): - return f"{value.answer_prefix}" \ - , f"{value.MarkedWithStar}" \ - , f"{value.answer_content}" - -class QuestionSerializer(serializers.Serializer): - number_provided = serializers.CharField(required=False) - question_header_type = serializers.CharField(required=False) - question_header_title = serializers.CharField(required=False) - question_header_points = serializers.CharField(required=False) - questiontype_processed = serializers.CharField(required=False) - questioncontent = serializers.CharField(required=False) - basetextanswers = serializers.ListField(required=False, - child=BaseTextAnswerField(required=False)) - # answers = serializers.CharField(required=False) - # processedquestion = ProcessedQuestionSerializer(required=False) diff --git a/restapi/tasks.py b/restapi/tasks.py deleted file mode 100644 index 9234cad..0000000 --- a/restapi/tasks.py +++ /dev/null @@ -1,62 +0,0 @@ -from celery import shared_task -from celery.utils.log import get_task_logger - -loggercelery = get_task_logger(__name__) -import re - -from .logging.logging_adapter import FilenameLoggingAdapter -from .logging.ErrorTypes import (WRInlineStructureError, WREndStructureError, MSInlineStructureError, MSEndStructureError, ORDInlineStructureError, ORDEndStructureError, MCInlineStructureError, MCEndStructureError, TFInlineStructureError, TFEndStructureError, FIBInlineStructureError, FIBEndStructureError, MATInlineStructureError, MATEndStructureError, InlineNoTypeError, EndAnswerNoTypeError, NoTypeDeterminedError, MarkDownConversionError) -from .logging.WarningTypes import (RespondusTypeEWarning, RespondusTypeMRWarning, RespondusTypeFMBWarning, RespondusTypeMTWarning) - -@shared_task() -def run_pandoc_task(temp_file_path, filename): - logger = FilenameLoggingAdapter(loggercelery, { - 'filename': filename - }) - - try: - import pypandoc - mdblockquotePath = "./pandoc/pandoc-filters/mdblockquote.lua" - emptyparaPath = "./pandoc/pandoc-filters/emptypara.lua" - imageFilterPath = "./pandoc/pandoc-filters/image.lua" - tables = "./pandoc/pandoc-filters/tables.lua" - # listsPath = "./api/pandoc/pandoc-filters/lists.lua" - - pandoc_word_to_html = pypandoc.convert_file( - temp_file_path, - format='docx+empty_paragraphs', - to='html+empty_paragraphs+tex_math_single_backslash', - extra_args=['--no-highlight', - '--embed-resources', - '--markdown-headings=atx', - '--preserve-tabs', - '--wrap=preserve', - '--indent=false', - '--mathml', - '--ascii', - # '--lua-filter=' + imageFilterPath - ]) - pandoc_word_to_html = re.sub(r"(?!\s)", " ", pandoc_word_to_html) - pandoc_word_to_html = re.sub(r"(?!\s)", " ", pandoc_word_to_html) - pandoc_html_to_md = pypandoc.convert_text( - pandoc_word_to_html, - 'markdown_github+fancy_lists+emoji+hard_line_breaks+all_symbols_escapable+escaped_line_breaks+pipe_tables+startnum+tex_math_dollars', - format='html+empty_paragraphs', - extra_args=['--no-highlight', - '--embed-resources', - '--markdown-headings=atx', - '--preserve-tabs', - '--wrap=preserve', - '--indent=false', - '--mathml', - '--ascii', - '--lua-filter=' + mdblockquotePath, - '--lua-filter=' + emptyparaPath, - # '--lua-filter=' + tables - ]) - pandoc_html_to_md = pandoc_html_to_md.rstrip() - return "\n" + pandoc_html_to_md + "\n" - except Exception as e: - logger.debug(e) - raise MarkDownConversionError(e) - diff --git a/restapi/tests.py b/restapi/tests.py deleted file mode 100644 index 7ce503c..0000000 --- a/restapi/tests.py +++ /dev/null @@ -1,3 +0,0 @@ -from django.test import TestCase - -# Create your tests here. diff --git a/restapi/urls.py b/restapi/urls.py deleted file mode 100644 index cb6d5db..0000000 --- a/restapi/urls.py +++ /dev/null @@ -1,15 +0,0 @@ -# This Source Code Form is subject to the terms of the Mozilla Public -# License, v. 2.0. If a copy of the MPL was not distributed with this -# file, You can obtain one at https://mozilla.org/MPL/2.0/. - -from django.urls import include, path, re_path -from . import views -from django.conf import settings - -urlpatterns = [ - path('format', views.format), - path('sections', views.sections), - path('splitter', views.splitter), - path('parsequestion', views.parsequestion) -] - diff --git a/restapi/views.py b/restapi/views.py deleted file mode 100644 index ed0da75..0000000 --- a/restapi/views.py +++ /dev/null @@ -1,131 +0,0 @@ -from rest_framework.decorators import api_view, permission_classes, authentication_classes, parser_classes -from rest_framework.parsers import JSONParser -from rest_framework.response import Response -from rest_framework import authentication, permissions -from rest_framework.views import APIView -from rest_framework.permissions import AllowAny -from django.conf import settings - -from .serializers import FormatSerializer -from .serializers import SectionListSerializer, SectionSerializer -from .serializers import QuestionListSerializer, QuestionBaseSerializer, QuestionSerializer -from .models import Format -from .models import SectionList, QuestionList -from .models import BaseQuestion, BaseTextAnswer -from .process.common.extract_images import extract_images -from .process.common.restore_images import restore_images -# from .process.sectioner.sectioner import run_sectioner -# from .process.splitter.splitter import run_splitter -from .process.splitter.splitter import Splitter -from .process.questionparser.questionparser import run_questionparser - -import logging -logger = logging.getLogger(__name__) - -@authentication_classes([]) -@permission_classes([]) -@api_view(['POST']) -def format(request): - maincontent_title = request.data['file'].name.split(".")[0] - filename = request.data['file'].name - temp_file_path = request.data['file'].temporary_file_path() - temp_file_name = request.data['file'].name - - format = Format(temp_file_path, temp_file_name, filename, maincontent_title) - format.convert_pandoc().extract_images().convert_txt().fix_numbering().run_formatter().restore_images() - serializer = FormatSerializer(format) - return Response(serializer.data, status=200) - -@parser_classes([JSONParser]) -@authentication_classes([]) -@permission_classes([]) -@api_view(['POST']) -def sections(request): - serializer = FormatSerializer(data=request.data) - if serializer.is_valid(raise_exception=True): - - sectionlist = SectionList( - content = serializer.validated_data['body']) - - sectionlist.content, images_list = extract_images( - sectionlist.content) - - sectionlist.run_sectioner() - - for section in sectionlist.sections_list: - sectionheader = restore_images(section.sectionheader, - images_list) - setattr(section, 'sectionheader', sectionheader) - sectioncontent = restore_images(section.sectioncontent, - images_list) - setattr(section, 'sectioncontent', sectioncontent) - - serializer = SectionListSerializer(sectionlist) - - return Response(serializer.data, status=200) - return Response(serializer.errors, status=400) - -@parser_classes([JSONParser]) -@authentication_classes([]) -@permission_classes([]) -@api_view(['POST']) -def splitter(request): - serializer = SectionSerializer(data=request.data) - if serializer.is_valid(raise_exception=True): - questionlist = QuestionList( - content=serializer.validated_data['sectioncontent'], - ) - - questionlist.content, images_list = extract_images( - questionlist.content) - - splitter = Splitter(questionlist.content) - splitter.add_newlines_before_question().split_questions() - questionlist = super(splitter.__class__,splitter) - for question in questionlist.question_list: - questioncontent = restore_images(question.questioncontent, - images_list) - setattr(question, 'questioncontent', questioncontent) - serializer = QuestionListSerializer(questionlist) - return Response(serializer.data, status=200) - return Response(serializer.errors, status=400) - -@parser_classes([JSONParser]) -@authentication_classes([]) -@permission_classes([]) -@api_view(['POST']) -def parsequestion(request): - serializer = QuestionSerializer(data=request.data) - if serializer.is_valid(raise_exception=True): - basequestion = BaseQuestion( - questioncontent=serializer.validated_data['questioncontent'], - ) - basequestion.questioncontent, images_list = extract_images( - basequestion.questioncontent) - - basequestion.get_line_elements() - basequestion.extract_question_header_elements() - basequestion.get_question_body_parts_list() - basequestion.get_number_provided() - basequestion.separate_question_and_answers() - basequestion.check_questiontype() - basequestion.compare_user_type_with_processed_type() - - serializernew = QuestionSerializer(basequestion) - return Response(serializernew.data, status=200) - return Response(serializer.errors, status=400) - - -@parser_classes([JSONParser]) -@authentication_classes([]) -@permission_classes([]) -@api_view(['POST']) -def endanswer(request): - - return Response("endanswer") - -class RootPath(APIView): - permission_classes = [AllowAny] - - def get(self, request, format=None): - return Response(settings.APP_VERSION, status=200) \ No newline at end of file