From 598cb74c15d6af34a5dbc693045e602a7387a342 Mon Sep 17 00:00:00 2001 From: doomedraven Date: Fri, 2 Jan 2026 17:32:55 +0100 Subject: [PATCH] Fds close (#2820) * close_fds * close_fds * Update lib/cuckoo/core/log.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * Update utils/process.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * Update utils/process.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * close_fds * close_fds * close_fds * close_fds * dead cod * more FDs leaks * more FDs leaks * more FDs leaks * more FDs leaks * more FDs leaks * more FDs leaks --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- docs/book/src/usage/monitor.rst | 5 + lib/cuckoo/common/cape_utils.py | 15 +- .../common/integrations/file_extra_info.py | 11 +- lib/cuckoo/common/integrations/parse_pe.py | 10 ++ lib/cuckoo/common/mapTTPs.py | 3 +- lib/cuckoo/common/quarantine.py | 6 +- lib/cuckoo/common/saztopcap.py | 12 +- lib/cuckoo/common/utils.py | 7 +- lib/cuckoo/common/web_utils.py | 155 +++++------------- lib/cuckoo/core/analysis_manager.py | 3 +- lib/cuckoo/core/database.py | 3 +- lib/cuckoo/core/log.py | 27 +++ lib/cuckoo/core/plugins.py | 3 + lib/cuckoo/core/resultserver.py | 7 +- modules/processing/analysisinfo.py | 3 +- modules/processing/debug.py | 3 +- modules/processing/dumptls.py | 37 +++-- modules/processing/network.py | 33 ++-- modules/reporting/maec5.py | 3 +- utils/dist.py | 11 +- utils/process.py | 129 +++++++++------ utils/submit.py | 94 +++++------ utils/vpn2cape.py | 8 +- web/apiv2/views.py | 33 ++-- web/submission/views.py | 33 ++-- web/web/allauth_adapters.py | 3 +- 26 files changed, 341 insertions(+), 316 deletions(-) diff --git a/docs/book/src/usage/monitor.rst b/docs/book/src/usage/monitor.rst index 05fba0606d2..ac20fdb6d4b 100644 --- a/docs/book/src/usage/monitor.rst +++ b/docs/book/src/usage/monitor.rst @@ -211,3 +211,8 @@ One example procedure is as follow: .. image:: ../_images/screenshots/debugger2disassembler.png :align: center + + +The Art of Detonation Debugging: A Strategic Guide for CAPE Sandbox +=================================================================== +Coming soon diff --git a/lib/cuckoo/common/cape_utils.py b/lib/cuckoo/common/cape_utils.py index 6299e3c4d22..dfd8961b094 100644 --- a/lib/cuckoo/common/cape_utils.py +++ b/lib/cuckoo/common/cape_utils.py @@ -122,13 +122,13 @@ def hash_file(method, path: str) -> str: @param path: file path @return: computed hash string """ - f = open(path, "rb") h = method() - while True: - buf = f.read(BUFSIZE) - if not buf: - break - h.update(buf) + with open(path, "rb") as f: + while True: + buf = f.read(BUFSIZE) + if not buf: + break + h.update(buf) return h.hexdigest() @@ -335,7 +335,8 @@ def static_config_lookup(file_path: str, sha256: str = False) -> dict: dict or None: A dictionary containing the configuration information if found, otherwise None. """ if not sha256: - sha256 = hashlib.sha256(open(file_path, "rb").read()).hexdigest() + with open(file_path, "rb") as f: + sha256 = hashlib.sha256(f.read()).hexdigest() if repconf.mongodb.enabled: document_dict = mongo_find_one( diff --git a/lib/cuckoo/common/integrations/file_extra_info.py b/lib/cuckoo/common/integrations/file_extra_info.py index 966838d8f33..183267fe1cc 100644 --- a/lib/cuckoo/common/integrations/file_extra_info.py +++ b/lib/cuckoo/common/integrations/file_extra_info.py @@ -187,7 +187,8 @@ def static_file_info( # ToDo we need type checking as it wont work for most of static jobs if HAVE_PEFILE and ("PE32" in data_dictionary["type"] or "MS-DOS executable" in data_dictionary["type"]): - data_dictionary["pe"] = PortableExecutable(file_path).run(task_id) + with PortableExecutable(file_path) as pe: + data_dictionary["pe"] = pe.run(task_id) if HAVE_FLARE_CAPA: # https://github.com/mandiant/capa/issues/2620 @@ -965,7 +966,13 @@ def RarSFX_extract(file, *, data_dictionary, options: dict, **_) -> ExtractorRet @time_tracker def office_one(file, **_) -> ExtractorReturnType: - if not HAVE_ONE or open(file, "rb").read(16) not in ( + if not HAVE_ONE: + return + + with open(file, "rb") as f: + header = f.read(16) + + if header not in ( b"\xE4\x52\x5C\x7B\x8C\xD8\xA7\x4D\xAE\xB1\x53\x78\xD0\x29\x96\xD3", b"\xA1\x2F\xFF\x43\xD9\xEF\x76\x4C\x9E\xE2\x10\xEA\x57\x22\x76\x5F", ): diff --git a/lib/cuckoo/common/integrations/parse_pe.py b/lib/cuckoo/common/integrations/parse_pe.py index e21ede0a2ba..b75169bce16 100644 --- a/lib/cuckoo/common/integrations/parse_pe.py +++ b/lib/cuckoo/common/integrations/parse_pe.py @@ -164,6 +164,16 @@ def __init__(self, file_path: str = False, data: bytes = False): log.debug("PE type not recognised: %s", e) # self.results = results + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def close(self): + if self.pe: + self.pe.close() + @property def file_data(self): if not self._file_data and path_exists(self.file_path): diff --git a/lib/cuckoo/common/mapTTPs.py b/lib/cuckoo/common/mapTTPs.py index 186f8b94e9b..1e95696ea2e 100644 --- a/lib/cuckoo/common/mapTTPs.py +++ b/lib/cuckoo/common/mapTTPs.py @@ -8,7 +8,8 @@ ttps_map_file = os.path.join(CUCKOO_ROOT, "data", "mitre", "TTPs.json") if os.path.exists(ttps_map_file): try: - ttpDict = json.loads(open(ttps_map_file, "r").read()) + with open(ttps_map_file, "r") as f: + ttpDict = json.load(f) except Exception as e: print("Can't load TTPs.json file", e) diff --git a/lib/cuckoo/common/quarantine.py b/lib/cuckoo/common/quarantine.py index 1d39dadcb06..0d8eaa93444 100644 --- a/lib/cuckoo/common/quarantine.py +++ b/lib/cuckoo/common/quarantine.py @@ -721,8 +721,10 @@ def unquarantine(f): tmp_path = unquarantine(sys.argv[1]) if tmp_path: - original = hashlib.sha256(open(sys.argv[1], "rb").read()).hexdigest() - unq = hashlib.sha256(open(tmp_path, "rb").read()).hexdigest() + with open(sys.argv[1], "rb") as f: + original = hashlib.sha256(f.read()).hexdigest() + with open(tmp_path, "rb") as f: + unq = hashlib.sha256(f.read()).hexdigest() if original == unq: print("Unsuported quarantine file format") else: diff --git a/lib/cuckoo/common/saztopcap.py b/lib/cuckoo/common/saztopcap.py index e64fa62217c..90d911e0f77 100644 --- a/lib/cuckoo/common/saztopcap.py +++ b/lib/cuckoo/common/saztopcap.py @@ -163,13 +163,15 @@ def saz_to_pcap(sazpath): src = m.group("clientip") elif m and m.group("hostip"): dst = m.group("hostip") - req = open(f"{fiddler_raw_dir}{fid}_c.txt").read() + with open(f"{fiddler_raw_dir}{fid}_c.txt") as f: + req = f.read() m = re.match(r"^(?P[^\r\n\s]+)\s+(?Phttps?\:\/\/[^\/\r\n\:]+(\:(?P\d{1,5}))?)\/", req) if m and m.group("verb") != "CONNECT": req = req.replace(m.group("host_and_port"), "", 1) if m.group("dport") and int(m.group("dport")) <= 65535: dport = int(m.group("dport")) - resp = open(f"{fiddler_raw_dir}{fid}_s.txt").read() + with open(f"{fiddler_raw_dir}{fid}_s.txt") as f: + resp = f.read() (seq, ack) = build_handshake(src, dst, sport, dport, pktdump, smac, dmac) (seq, ack) = make_pkts(src, dst, sport, dport, seq, ack, req, pktdump, smac, dmac) (seq, ack) = make_pkts(dst, src, dport, sport, seq, ack, resp, pktdump, dmac, smac) @@ -192,13 +194,15 @@ def saz_to_pcap(sazpath): log.error("Failed to find fiddler ID tag") return None - req = open(f"{fiddler_raw_dir}{fid}_c.txt").read() + with open(f"{fiddler_raw_dir}{fid}_c.txt") as f: + req = f.read() m = re.match(r"^(?P[^\r\n\s]+)\s+(?Phttps?\:\/\/[^\/\r\n\:]+(\:(?P\d{1,5}))?)\/", req) if m and m.group("verb") != "CONNECT": req = req.replace(m.group("host_and_port"), "", 1) if m.group("dport") and int(m.group("dport")) <= 65535: dport = int(m.group("dport")) - resp = open(f"{fiddler_raw_dir}{fid}_s.txt").read() + with open(f"{fiddler_raw_dir}{fid}_s.txt") as f: + resp = f.read() (seq, ack) = build_handshake(src, dst, sport, dport, pktdump, smac, dmac) (seq, ack) = make_pkts(src, dst, sport, dport, seq, ack, req, pktdump, smac, dmac) (seq, ack) = make_pkts(dst, src, dport, sport, seq, ack, resp, pktdump, dmac, smac) diff --git a/lib/cuckoo/common/utils.py b/lib/cuckoo/common/utils.py index 3f2e67dd1bb..5c468efddf6 100644 --- a/lib/cuckoo/common/utils.py +++ b/lib/cuckoo/common/utils.py @@ -602,10 +602,11 @@ def store_temp_file(filedata: bytes, filename: str, path=None) -> bytes: with open(tmp_file_path, "wb") as tmp_file: # If filedata is file object, do chunked copy. if hasattr(filedata, "read"): - chunk = filedata.read(1024) - while chunk: - tmp_file.write(chunk) + with filedata: chunk = filedata.read(1024) + while chunk: + tmp_file.write(chunk) + chunk = filedata.read(1024) else: tmp_file.write(filedata) diff --git a/lib/cuckoo/common/web_utils.py b/lib/cuckoo/common/web_utils.py index cccebf212be..4a0aa0906cb 100644 --- a/lib/cuckoo/common/web_utils.py +++ b/lib/cuckoo/common/web_utils.py @@ -1605,53 +1605,54 @@ def process_new_task_files(request, samples: list, details: dict, opt_filename: """ list_of_files = [] for sample in samples: - # Error if there was only one submitted sample, and it's empty. - # But if there are multiple and one was empty, just ignore it. - if not sample.size: - details["errors"].append({sample.name: "You uploaded an empty file."}) - continue + with sample: + # Error if there was only one submitted sample, and it's empty. + # But if there are multiple and one was empty, just ignore it. + if not sample.size: + details["errors"].append({sample.name: "You uploaded an empty file."}) + continue - size = sample.size - if size > web_cfg.general.max_sample_size and not ( - web_cfg.general.allow_ignore_size and "ignore_size_check" in details["options"] - ): - if not web_cfg.general.enable_trim: + size = sample.size + if size > web_cfg.general.max_sample_size and not ( + web_cfg.general.allow_ignore_size and "ignore_size_check" in details["options"] + ): + if not web_cfg.general.enable_trim: + details["errors"].append( + { + sample.name: f"Uploaded file exceeds the maximum allowed size in conf/web.conf. Sample size is: {size / float(1 << 20):,.0f} Allowed size is: {web_cfg.general.max_sample_size / float(1 << 20):,.0f}" + } + ) + continue + + data = sample.read() + + if opt_filename: + filename = opt_filename + else: + filename = sanitize_filename(sample.name) + + # Moving sample from django temporary file to CAPE temporary storage for persistence, if configured by user. + try: + path = store_temp_file(data, filename) + target_file = File(path) + sha256 = target_file.get_sha256() + except OSError: details["errors"].append( - { - sample.name: f"Uploaded file exceeds the maximum allowed size in conf/web.conf. Sample size is: {size / float(1 << 20):,.0f} Allowed size is: {web_cfg.general.max_sample_size / float(1 << 20):,.0f}" - } + {filename: "Temp folder from cuckoo.conf, disk is out of space. Clean some space before continue."} ) continue - data = sample.read() - - if opt_filename: - filename = opt_filename - else: - filename = sanitize_filename(sample.name) - - # Moving sample from django temporary file to CAPE temporary storage for persistence, if configured by user. - try: - path = store_temp_file(data, filename) - target_file = File(path) - sha256 = target_file.get_sha256() - except OSError: - details["errors"].append( - {filename: "Temp folder from cuckoo.conf, disk is out of space. Clean some space before continue."} - ) - continue - - if ( - not request.user.is_staff - and (web_cfg.uniq_submission.enabled or unique) - and db.check_file_uniq(sha256, hours=web_cfg.uniq_submission.hours) - ): - details["errors"].append( - {filename: "Duplicated file, disable unique option on submit or in conf/web.conf to force submission"} - ) - continue + if ( + not request.user.is_staff + and (web_cfg.uniq_submission.enabled or unique) + and db.check_file_uniq(sha256, hours=web_cfg.uniq_submission.hours) + ): + details["errors"].append( + {filename: "Duplicated file, disable unique option on submit or in conf/web.conf to force submission"} + ) + continue - list_of_files.append((data, path, sha256)) + list_of_files.append((data, path, sha256)) return list_of_files, details @@ -1690,78 +1691,6 @@ def process_new_dlnexec_task(url: str, route: str, options: str, custom: str): return path, response, "" -def submit_task( - target: str, - package: str = "", - timeout: int = 0, - task_options: str = "", - priority: int = 1, - machine: str = "", - platform: str = "", - memory: bool = False, - enforce_timeout: bool = False, - clock: str = None, - tags: str = None, - parent_id: int = None, - tlp: bool = None, - distributed: bool = False, - filename: str = "", - server_url: str = "", -): - """ - ToDo add url support in future - """ - if not path_exists(target): - log.info("File doesn't exist") - return - - task_id = False - if distributed: - options = { - "package": package, - "timeout": timeout, - "options": task_options, - "priority": priority, - # "machine": machine, - "platform": platform, - "memory": memory, - "enforce_timeout": enforce_timeout, - "clock": clock, - "tags": tags, - "parent_id": parent_id, - "filename": filename, - } - - multipart_file = [("file", (os.path.basename(target), open(target, "rb")))] - try: - res = requests.post(server_url, files=multipart_file, data=options) - if res and res.ok: - task_id = res.json()["data"]["task_ids"][0] - except Exception as e: - log.error(e) - else: - task_id = db.add_path( - file_path=target, - package=package, - timeout=timeout, - options=task_options, - priority=priority, - machine=machine, - platform=platform, - memory=memory, - enforce_timeout=enforce_timeout, - parent_id=parent_id, - tlp=tlp, - filename=filename, - ) - if not task_id: - log.warning("Error adding CAPE task to database: %s", package) - return task_id - - log.info('CAPE detection on file "%s": %s - added as CAPE task with ID %s', target, package, task_id) - return task_id - - # https://stackoverflow.com/questions/14989858/get-the-current-git-hash-in-a-python-script/68215738#68215738 def get_running_commit() -> str: """ diff --git a/lib/cuckoo/core/analysis_manager.py b/lib/cuckoo/core/analysis_manager.py index 4f230d3da35..6039097e2f3 100644 --- a/lib/cuckoo/core/analysis_manager.py +++ b/lib/cuckoo/core/analysis_manager.py @@ -257,7 +257,8 @@ def build_options(self): options["file_name"] = file_obj.get_name() options["file_type"] = file_obj.get_type() # if it's a PE file, collect export information to use in more smartly determining the right package to use - options["exports"] = PortableExecutable(self.task.target).get_dll_exports() + with PortableExecutable(self.task.target) as pe: + options["exports"] = pe.get_dll_exports() del file_obj # options from auxiliary.conf diff --git a/lib/cuckoo/core/database.py b/lib/cuckoo/core/database.py index c1d95d6c906..1b96cc0a53c 100644 --- a/lib/cuckoo/core/database.py +++ b/lib/cuckoo/core/database.py @@ -1654,7 +1654,8 @@ def demux_sample_and_add_to_db( log.info("Do sandbox packages need an update? Sflock identifies as: %s - %s", tmp_package, file) if package == "dll" and "function" not in options: - dll_export = PortableExecutable(file.decode()).choose_dll_export() + with PortableExecutable(file.decode()) as pe: + dll_export = pe.choose_dll_export() if dll_export == "DllRegisterServer": package = "regsvr" elif dll_export == "xlAutoOpen": diff --git a/lib/cuckoo/core/log.py b/lib/cuckoo/core/log.py index 8e983750109..54bb7100cc7 100644 --- a/lib/cuckoo/core/log.py +++ b/lib/cuckoo/core/log.py @@ -147,6 +147,33 @@ def task_log_stop(task_id): _tasks_lock.release() +def task_log_stop_force(task_id): + """Force disassociate all threads from a task and close the log file.""" + _tasks_lock.acquire() + try: + if task_id not in _task_threads: + return + + # Close the file handle (shared by all threads for this task) + # We can take it from any associated thread + if _task_threads[task_id]: + first_key = _task_threads[task_id][0] + if first_key in _tasks: + _, fp = _tasks[first_key] + try: + fp.close() + except Exception as e: + logging.warning("Failed to force-close log for task %d: %s", task_id, e) + + # Cleanup all references + for thread_key in _task_threads[task_id]: + _tasks.pop(thread_key, None) + + _task_threads.pop(task_id, None) + finally: + _tasks_lock.release() + + def init_logger(name, level=None): formatter = logging.Formatter("%(asctime)s [%(name)s] %(levelname)s: %(message)s") diff --git a/lib/cuckoo/core/plugins.py b/lib/cuckoo/core/plugins.py index 330765b147a..1bbb779cba7 100644 --- a/lib/cuckoo/core/plugins.py +++ b/lib/cuckoo/core/plugins.py @@ -813,6 +813,9 @@ def __init__(self, task, results, reprocess=False): self.task = task if results.get("pefiles"): + for pe in results["pefiles"].values(): + with suppress(Exception): + pe.close() del results["pefiles"] # remove unwanted/duplicate information from reporting diff --git a/lib/cuckoo/core/resultserver.py b/lib/cuckoo/core/resultserver.py index ae560bc050b..c0c44d070a2 100644 --- a/lib/cuckoo/core/resultserver.py +++ b/lib/cuckoo/core/resultserver.py @@ -32,7 +32,7 @@ # from lib.cuckoo.common.netlog import BsonParser from lib.cuckoo.common.utils import Singleton, create_folder, default_converter, load_categories -from lib.cuckoo.core.log import task_log_start, task_log_stop +from lib.cuckoo.core.log import task_log_start, task_log_stop, task_log_stop_force log = logging.getLogger(__name__) cfg = Config() @@ -479,10 +479,7 @@ def del_task(self, task_id, ipaddr): for ctx in ctxs: log.debug("Task #%s: Cancel %s", task_id, ctx) ctx.cancel() - # ToDo just reinforce cleanup - task_log_stop(task_id) - - task_log_stop(task_id) + task_log_stop_force(task_id) def create_folders(self): for folder in list(RESULT_UPLOADABLE) + [b"logs"]: diff --git a/modules/processing/analysisinfo.py b/modules/processing/analysisinfo.py index b614cbe38f4..fa09d0f838f 100644 --- a/modules/processing/analysisinfo.py +++ b/modules/processing/analysisinfo.py @@ -60,7 +60,8 @@ def get_package(self): package = package["name"] if not package and path_exists(self.log_path): try: - analysis_log = codecs.open(self.log_path, "rb", "utf-8").read() + with codecs.open(self.log_path, "rb", "utf-8") as f: + analysis_log = f.read() except ValueError as e: raise CuckooProcessingError(f"Error decoding {self.log_path}: {e}") from e except (IOError, OSError) as e: diff --git a/modules/processing/debug.py b/modules/processing/debug.py index 6d861dbf6fa..86a353a365d 100644 --- a/modules/processing/debug.py +++ b/modules/processing/debug.py @@ -24,7 +24,8 @@ def run(self): if path_exists(self.log_path): try: buf_size = self.options.get("buffer", 8192) - content = codecs.open(self.log_path, "rb", "utf-8").read() + with codecs.open(self.log_path, "rb", "utf-8") as f: + content = f.read() debug["log"] = truncate_str(content, buf_size) except ValueError as e: raise CuckooProcessingError(f"Error decoding {self.log_path}: {e}") from e diff --git a/modules/processing/dumptls.py b/modules/processing/dumptls.py index 8098e8768c6..73ae97c716a 100644 --- a/modules/processing/dumptls.py +++ b/modules/processing/dumptls.py @@ -34,24 +34,25 @@ def run(self): if not path_exists(dump_tls_log): return results - for entry in open(dump_tls_log, "r").readlines() or []: - try: - for m in re.finditer( - r"client_random:\s*(?P[a-f0-9]+)\s*,\s*server_random:\s*(?P[a-f0-9]+)\s*,\s*master_secret:\s*(?P[a-f0-9]+)\s*", - entry, - re.I, - ): - try: - server_random = binascii.a2b_hex(m.group("server_random").strip()) - master_secret = binascii.a2b_hex(m.group("master_secret").strip()) - if server_random not in metakeys: - log.debug("Was unable to extract TLS master secret for server random %s, skipping it", server_random) - continue - results[metakeys[server_random]] = master_secret - except Exception as e: - log.warning("Problem dealing with tlsdump error: %s line: %s", e, m.group(0)) - except Exception as e: - log.warning("Problem dealing with tlsdump error: %s line: %s", e, entry) + with open(dump_tls_log, "r") as f: + for entry in f: + try: + for m in re.finditer( + r"client_random:\s*(?P[a-f0-9]+)\s*,\s*server_random:\s*(?P[a-f0-9]+)\s*,\s*master_secret:\s*(?P[a-f0-9]+)\s*", + entry, + re.I, + ): + try: + server_random = binascii.a2b_hex(m.group("server_random").strip()) + master_secret = binascii.a2b_hex(m.group("master_secret").strip()) + if server_random not in metakeys: + log.debug("Was unable to extract TLS master secret for server random %s, skipping it", server_random) + continue + results[metakeys[server_random]] = master_secret + except Exception as e: + log.warning("Problem dealing with tlsdump error: %s line: %s", e, m.group(0)) + except Exception as e: + log.warning("Problem dealing with tlsdump error: %s line: %s", e, entry) if results: # Write the TLS master secrets file. diff --git a/modules/processing/network.py b/modules/processing/network.py index 149ced7d0e5..dc0a3c5c621 100644 --- a/modules/processing/network.py +++ b/modules/processing/network.py @@ -1159,22 +1159,23 @@ def get_tlsmaster(self): if not path_exists(dump_tls_log): return tlsmaster - for entry in open(dump_tls_log, "r").readlines() or []: - try: - for m in re.finditer( - r"client_random:\s*(?P[a-f0-9]+)\s*,\s*server_random:\s*(?P[a-f0-9]+)\s*,\s*master_secret:\s*(?P[a-f0-9]+)\s*", - entry, - re.I, - ): - try: - client_random = binascii.a2b_hex(m.group("client_random").strip()) - server_random = binascii.a2b_hex(m.group("server_random").strip()) - master_secret = binascii.a2b_hex(m.group("master_secret").strip()) - tlsmaster[client_random, server_random] = master_secret - except Exception as e: - log.warning("Problem dealing with tlsdump error: %s line: %s", e, m.group(0)) - except Exception as e: - log.warning("Problem dealing with tlsdump error: %s line: %s", e, entry) + with open(dump_tls_log, "r") as f: + for entry in f: + try: + for m in re.finditer( + r"client_random:\s*(?P[a-f0-9]+)\s*,\s*server_random:\s*(?P[a-f0-9]+)\s*,\s*master_secret:\s*(?P[a-f0-9]+)\s*", + entry, + re.I, + ): + try: + client_random = binascii.a2b_hex(m.group("client_random").strip()) + server_random = binascii.a2b_hex(m.group("server_random").strip()) + master_secret = binascii.a2b_hex(m.group("master_secret").strip()) + tlsmaster[client_random, server_random] = master_secret + except Exception as e: + log.warning("Problem dealing with tlsdump error: %s line: %s", e, m.group(0)) + except Exception as e: + log.warning("Problem dealing with tlsdump error: %s line: %s", e, entry) return tlsmaster diff --git a/modules/reporting/maec5.py b/modules/reporting/maec5.py index 59958b076aa..fcb40e1eb71 100644 --- a/modules/reporting/maec5.py +++ b/modules/reporting/maec5.py @@ -750,4 +750,5 @@ def output(self): if __name__ == "__main__": mr = MaecReport() - mr.run(json.loads(open(sys.argv[1], "rb").read())) + with open(sys.argv[1], "rb") as f: + mr.run(json.load(f)) diff --git a/utils/dist.py b/utils/dist.py index f319bdc2055..869000865f3 100644 --- a/utils/dist.py +++ b/utils/dist.py @@ -445,7 +445,8 @@ def node_submit_task(task_id, node_id, main_task_id): # encoding problem if r.status_code == 500 and task.category == "file": - r = requests.post(url, data=data, files={"file": ("file", open(task.path, "rb").read())}, verify=False) + with open(task.path, "rb") as f: + r = requests.post(url, data=data, files={"file": ("file", f.read())}, verify=False) # Zip files preprocessed, so only one id if r and r.status_code == 200: @@ -826,7 +827,7 @@ def fetcher(self): # node_data.enabled = False # db.commit() db.commit() - # time.sleep(5) + time.sleep(5) def delete_target_file(self, task_id: int, sample_sha256: str, target: str): """ @@ -971,7 +972,8 @@ def fetch_latest_reports_nfs(self): if sample_sha256 is None: # keep fallback for now - sample = open(t.path, "rb").read() + with open(t.path, "rb") as f: + sample = f.read() sample_sha256 = hashlib.sha256(sample).hexdigest() destination = os.path.join(binaries_folder, sample_sha256) @@ -1129,7 +1131,8 @@ def fetch_latest_reports(self): sample_sha256 = samples[0].sample.sha256 if sample_sha256 is None: # keep fallback for now - sample = open(t.path, "rb").read() + with open(t.path, "rb") as f: + sample = f.read() sample_sha256 = hashlib.sha256(sample).hexdigest() destination = os.path.join(CUCKOO_ROOT, "storage", "binaries") diff --git a/utils/process.py b/utils/process.py index 84e6edd5d3d..c3c50009f03 100644 --- a/utils/process.py +++ b/utils/process.py @@ -131,60 +131,62 @@ def process( set_formatter_fmt(task_id, main_task_id) setproctitle(f"{original_proctitle} [Task {task_id}]") results = {"statistics": {"processing": [], "signatures": [], "reporting": []}} - if memory_debugging: - gc.collect() - log.info("(1) GC object counts: %d, %d", len(gc.get_objects()), len(gc.garbage)) - if memory_debugging: - gc.collect() - log.info("(2) GC object counts: %d, %d", len(gc.get_objects()), len(gc.garbage)) - with db.session.begin(): - RunProcessing(task=task_dict, results=results).run() - if memory_debugging: - gc.collect() - log.info("(3) GC object counts: %d, %d", len(gc.get_objects()), len(gc.garbage)) - - RunSignatures(task=task_dict, results=results).run() - if memory_debugging: - gc.collect() - log.info("(4) GC object counts: %d, %d", len(gc.get_objects()), len(gc.garbage)) - - if report: - if auto or capeproc: - reprocess = False - else: - reprocess = report - - error_count = RunReporting(task=task.to_dict(), results=results, reprocess=reprocess).run() - status = TASK_REPORTED if error_count == 0 else TASK_FAILED_REPORTING + try: + if memory_debugging: + gc.collect() + log.info("(1) GC object counts: %d, %d", len(gc.get_objects()), len(gc.garbage)) + if memory_debugging: + gc.collect() + log.info("(2) GC object counts: %d, %d", len(gc.get_objects()), len(gc.garbage)) with db.session.begin(): - db.set_status(task_id, status) - - if auto: - # Is ok to delete original file, but we need to lookup on delete_bin_copy if no more pendings tasks - if cfg.cuckoo.delete_original and target and path_exists(target): - path_delete(target) + RunProcessing(task=task_dict, results=results).run() + if memory_debugging: + gc.collect() + log.info("(3) GC object counts: %d, %d", len(gc.get_objects()), len(gc.garbage)) + + RunSignatures(task=task_dict, results=results).run() + if memory_debugging: + gc.collect() + log.info("(4) GC object counts: %d, %d", len(gc.get_objects()), len(gc.garbage)) + + if report: + if auto or capeproc: + reprocess = False + else: + reprocess = report - if cfg.cuckoo.delete_bin_copy and task.category != "url": - copy_path = os.path.join(CUCKOO_ROOT, "storage", "binaries", sample_sha256) - if path_exists(copy_path): - with db.session.begin(): - is_still_used = db.sample_still_used(sample_sha256, task_id) - if not is_still_used: - path_delete(copy_path) + error_count = RunReporting(task=task.to_dict(), results=results, reprocess=reprocess).run() + status = TASK_REPORTED if error_count == 0 else TASK_FAILED_REPORTING + with db.session.begin(): + db.set_status(task_id, status) - if memory_debugging: - gc.collect() - log.info("(5) GC object counts: %d, %d", len(gc.get_objects()), len(gc.garbage)) - for i, obj in enumerate(gc.garbage): - log.info("(garbage) GC object #%d: type=%s", i, type(obj).__name__) + if auto: + # Is ok to delete original file, but we need to lookup on delete_bin_copy if no more pendings tasks + if cfg.cuckoo.delete_original and target and path_exists(target): + path_delete(target) - log.removeHandler(per_analysis_handler) + if cfg.cuckoo.delete_bin_copy and task.category != "url": + copy_path = os.path.join(CUCKOO_ROOT, "storage", "binaries", sample_sha256) + if path_exists(copy_path): + with db.session.begin(): + is_still_used = db.sample_still_used(sample_sha256, task_id) + if not is_still_used: + path_delete(copy_path) + + if memory_debugging: + gc.collect() + log.info("(5) GC object counts: %d, %d", len(gc.get_objects()), len(gc.garbage)) + for i, obj in enumerate(gc.garbage): + log.info("(garbage) GC object #%d: type=%s", i, type(obj).__name__) + finally: + per_analysis_handler.close() + log.removeHandler(per_analysis_handler) - # Remove the SQLAlchemy session to ensure the next task pulls objects from - # the database, instead of relying on a potentially outdated object cache. - # Stale data can prevent SQLAlchemy from querying the database or issuing - # statements, resulting in unexpected errors and inconsistencies. - db.session.remove() + # Remove the SQLAlchemy session to ensure the next task pulls objects from + # the database, instead of relying on a potentially outdated object cache. + # Stale data can prevent SQLAlchemy from querying the database or issuing + # statements, resulting in unexpected errors and inconsistencies. + db.session.remove() def init_worker(): @@ -192,6 +194,35 @@ def init_worker(): # See https://docs.sqlalchemy.org/en/14/core/pooling.html#using-connection-pools-with-multiprocessing-or-os-fork db.engine.dispose(close=False) + # Fix for open file handles on rotated logs in workers + for h in log.handlers[:]: + if isinstance(h, logging.FileHandler): + h.close() + log.removeHandler(h) + + # Restore Console Handler + ch = ConsoleHandler() + ch.setFormatter(FORMATTER) + log.addHandler(ch) + + # Restore Syslog Handler if enabled + if logconf.logger.syslog_process: + try: + slh = logging.handlers.SysLogHandler(address=logconf.logger.syslog_dev) + slh.setFormatter(FORMATTER) + log.addHandler(slh) + except Exception as e: + log.warning("Failed to restore Syslog handler in worker: %s", e) + + # Restore File Handler using WatchedFileHandler to support rotation + try: + path = os.path.join(CUCKOO_ROOT, "log", "process.log") + fh = logging.handlers.WatchedFileHandler(path) + fh.setFormatter(FORMATTER) + log.addHandler(fh) + except PermissionError as e: + log.warning("Failed to restore File handler in worker due to permissions: %s", e) + def get_formatter_fmt(task_id=None, main_task_id=None): """ diff --git a/utils/submit.py b/utils/submit.py index 8785b58f5c8..68207a62c08 100644 --- a/utils/submit.py +++ b/utils/submit.py @@ -278,56 +278,57 @@ def main(): else: url = "http://{0}/apiv2/tasks/create/file/".format(args.remote) - files = dict(file=open(file_path, "rb"), filename=os.path.basename(file_path)) - - data = dict( - package=args.package, - timeout=sane_timeout, - options=args.options, - priority=args.priority, - machine=args.machine, - platform=args.platform, - memory=args.memory, - enforce_timeout=args.enforce_timeout, - custom=args.custom, - tags=args.tags, - route=args.route, - ) - - try: - if args.user and args.password: - if args.ssl: - if args.sslnoverify: - verify = False + with open(file_path, "rb") as f: + files = dict(file=f, filename=os.path.basename(file_path)) + + data = dict( + package=args.package, + timeout=sane_timeout, + options=args.options, + priority=args.priority, + machine=args.machine, + platform=args.platform, + memory=args.memory, + enforce_timeout=args.enforce_timeout, + custom=args.custom, + tags=args.tags, + route=args.route, + ) + + try: + if args.user and args.password: + if args.ssl: + if args.sslnoverify: + verify = False + else: + verify = True + response = requests.post(url, auth=(args.user, args.password), files=files, data=data, verify=verify) else: - verify = True - response = requests.post(url, auth=(args.user, args.password), files=files, data=data, verify=verify) - else: - response = requests.post(url, auth=(args.user, args.password), files=files, data=data) - elif args.token: - if args.ssl: - if args.sslnoverify: - verify = False + response = requests.post(url, auth=(args.user, args.password), files=files, data=data) + elif args.token: + if args.ssl: + if args.sslnoverify: + verify = False + else: + verify = True + response = requests.post( + url, headers={"Authorization": f"Token {args.token}"}, files=files, data=data, verify=verify + ) else: - verify = True - response = requests.post( - url, headers={"Authorization": f"Token {args.token}"}, files=files, data=data, verify=verify - ) + response = requests.post(url, headers={"Authorization": f"Token {args.token}"}, files=files, data=data) else: - response = requests.post(url, headers={"Authorization": f"Token {args.token}"}, files=files, data=data) - else: - if args.ssl: - if args.sslnoverify: - verify = False + if args.ssl: + if args.sslnoverify: + verify = False + else: + verify = True + response = requests.post(url, files=files, data=data, verify=verify) else: - verify = True - response = requests.post(url, files=files, data=data, verify=verify) - else: - response = requests.post(url, files=files, data=data) + response = requests.post(url, files=files, data=data) - except Exception as e: - print((bold(red("Error")) + ": unable to send file: {0}".format(e))) - return False + except Exception as e: + print((bold(red("Error")) + ": unable to send file: {0}".format(e))) + return False json = response.json() task_ids = json["data"].get("task_ids") @@ -343,7 +344,8 @@ def main(): continue try: - tmp_path = store_temp_file(open(file_path, "rb").read(), sanitize_filename(os.path.basename(file_path))) + with open(file_path, "rb") as f: + tmp_path = store_temp_file(f.read(), sanitize_filename(os.path.basename(file_path))) with db.session.begin(): # ToDo expose extra_details["errors"] task_ids, extra_details = db.demux_sample_and_add_to_db( diff --git a/utils/vpn2cape.py b/utils/vpn2cape.py index 911efdd0ea1..2232aa949fa 100644 --- a/utils/vpn2cape.py +++ b/utils/vpn2cape.py @@ -25,7 +25,8 @@ def main(folder, port): for index, file in enumerate(files): if file.endswith(".ovpn"): path = os.path.join(folder, file) - tmp = open(path, "rt").read() + with open(path, "rt") as f: + tmp = f.read() write = 0 # rt_table @@ -75,9 +76,8 @@ def main(folder, port): if write: # updating config - tmp2 = open(path, "wt") - tmp2.write(tmp) - tmp2.close() + with open(path, "wt") as tmp2: + tmp2.write(tmp) if vpns: print("\n\n\n[+] VPNs for CAPE's routing.conf") diff --git a/web/apiv2/views.py b/web/apiv2/views.py index ae0f7690597..909cc12eac2 100644 --- a/web/apiv2/views.py +++ b/web/apiv2/views.py @@ -220,22 +220,23 @@ def tasks_create_static(request): extra_details = {} task_ids = [] for sample in files: - tmp_path = store_temp_file(sample.read(), sanitize_filename(sample.name)) - try: - task_id, extra_details = db.demux_sample_and_add_to_db( - tmp_path, - options=options, - priority=priority, - static=1, - only_extraction=True, - user_id=request.user.id or 0, - ) - task_ids.extend(task_id) - if extra_details.get("erros"): - resp["errors"].extend(extra_details["errors"]) - except CuckooDemuxError as e: - resp = {"error": True, "error_value": e} - return Response(resp) + with sample: + tmp_path = store_temp_file(sample.read(), sanitize_filename(sample.name)) + try: + task_id, extra_details = db.demux_sample_and_add_to_db( + tmp_path, + options=options, + priority=priority, + static=1, + only_extraction=True, + user_id=request.user.id or 0, + ) + task_ids.extend(task_id) + if extra_details.get("erros"): + resp["errors"].extend(extra_details["errors"]) + except CuckooDemuxError as e: + resp = {"error": True, "error_value": e} + return Response(resp) resp["data"] = {} resp["data"]["task_ids"] = task_ids diff --git a/web/submission/views.py b/web/submission/views.py index cafbcdb62b7..32d36f8e4f0 100644 --- a/web/submission/views.py +++ b/web/submission/views.py @@ -383,6 +383,19 @@ def index(request, task_id=None, resubmit_hash=None): } if opt_apikey: details["apikey"] = opt_apikey + + if web_conf.pre_script.enabled and "pre_script" in request.FILES: + pre_script = request.FILES["pre_script"] + details["pre_script_name"] = pre_script.name + details["pre_script_content"] = pre_script.read() + pre_script.close() + + if web_conf.during_script.enabled and "during_script" in request.FILES: + during_script = request.FILES["during_script"] + details["during_script_name"] = during_script.name + details["during_script_content"] = during_script.read() + during_script.close() + task_category = False samples = [] if "hash" in request.POST and request.POST.get("hash", False) and request.POST.get("hash")[0] != "": @@ -485,16 +498,6 @@ def index(request, task_id=None, resubmit_hash=None): if task_category == "resubmit": for content, path, sha256 in list_of_tasks: - if web_conf.pre_script.enabled and "pre_script" in request.FILES: - pre_script = request.FILES["pre_script"] - details["pre_script_name"] = request.FILES["pre_script"].name - details["pre_script_content"] = pre_script.read() - - if web_conf.during_script.enabled and "during_script" in request.FILES: - during_script = request.FILES["during_script"] - details["during_script_name"] = request.FILES["during_script"].name - details["during_script_content"] = during_script.read() - details["path"] = path details["content"] = content status, tasks_details = download_file(**details) @@ -513,16 +516,6 @@ def index(request, task_id=None, resubmit_hash=None): elif task_category == "sample": details["service"] = "WebGUI" for content, path, sha256 in list_of_tasks: - if web_conf.pre_script.enabled and "pre_script" in request.FILES: - pre_script = request.FILES["pre_script"] - details["pre_script_name"] = request.FILES["pre_script"].name - details["pre_script_content"] = pre_script.read() - - if web_conf.during_script.enabled and "during_script" in request.FILES: - during_script = request.FILES["during_script"] - details["during_script_name"] = request.FILES["during_script"].name - details["during_script_content"] = during_script.read() - if timeout and web_conf.public.enabled and web_conf.public.timeout and timeout > web_conf.public.timeout: timeout = web_conf.public.timeout diff --git a/web/web/allauth_adapters.py b/web/web/allauth_adapters.py index 12fdf93eda3..03942011e9d 100644 --- a/web/web/allauth_adapters.py +++ b/web/web/allauth_adapters.py @@ -8,7 +8,8 @@ disposable_domain_list = [] if hasattr(settings, "DISPOSABLE_DOMAIN_LIST"): - disposable_domain_list = [domain.strip() for domain in open(settings.DISPOSABLE_DOMAIN_LIST, "r").readlines()] + with open(settings.DISPOSABLE_DOMAIN_LIST, "r") as f: + disposable_domain_list = [domain.strip() for domain in f] class DisposableEmails(DefaultAccountAdapter):