Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"require": {
"monolog/monolog": "^1.25",
"monolog/monolog": "^2.11",
"phpoffice/phpexcel": "= 1.8.2"
}
}
66 changes: 41 additions & 25 deletions composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions docker/dev/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ RUN apt-get update && apt-get install -y software-properties-common && \
apache2 \
curl \
mysql-client \
pandoc \
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Love pandoc and I support this! But please add it to the main Dockerfile (in the repo root) if it is not already there too.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree, @asuquoe62-star Can you add this to the commit? If not we can sync up on it or I can add it.

unzip \
zip \
php$PHP_VERSION \
Expand Down
55 changes: 28 additions & 27 deletions htdocs/export/export_word.php
Original file line number Diff line number Diff line change
@@ -1,27 +1,28 @@
<?php
#
# Exports the given HTML content as word document
#
include("../includes/db_lib.php");
putUILog('export_word', 'X', basename($_SERVER['REQUEST_URI'], ".php"), 'X', 'X', 'X');

$date = date("Ymdhi");
$file_name = "blisreport_".$date.".doc";
header("Content-Type: application/vnd.ms-word");
header("Content-Disposition: attachment; filename=$file_name");
?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=Windows-1252" />
<title>Saves as a Word Doc</title>
</head>
<body>
<?php
$id=$_REQUEST['lab_id'];
$var=dirname(dirname(__FILE__))."\logos\logo_".$id.".jpg";
$html_content = "<img src='".$var."'"." height='140' width='140' />" . stripcslashes($_REQUEST['data']);
print $html_content;
?>
</body>
</html>
<?php
#
# Exports the given HTML content as word document
#
require_once("../includes/db_lib.php");
require_once(__DIR__."/word_export_lib.php");
putUILog('export_word', 'X', basename($_SERVER['REQUEST_URI'], ".php"), 'X', 'X', 'X');

$id = isset($_REQUEST['lab_id']) ? intval($_REQUEST['lab_id']) : 0;
$html_payload = isset($_REQUEST['data']) ? $_REQUEST['data'] : '';
$html_content = blis_word_normalize_html_fragment($html_payload);

if($id > 0)
{
$logo_file = __DIR__."/../logos/logo_".$id.".jpg";
if(is_file($logo_file))
{
# Pandoc handles local filesystem image references from HTML.
$html_content = "<img src=\"".$logo_file."\" height=\"140\" width=\"140\" />\n".$html_content;
}
}

$exported = blis_word_export_docx($html_content, "blisreport");
if($exported === false)
{
# Keep backward compatibility if pandoc is unavailable.
blis_word_send_legacy_doc($html_content, "blisreport");
}
50 changes: 25 additions & 25 deletions htdocs/export/export_word_aggregate.php
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
<?php
#
# Exports the given HTML content as word document
#
include("../includes/db_lib.php");
putUILog('export_word_aggregate', 'X', basename($_SERVER['REQUEST_URI'], ".php"), 'X', 'X', 'X');
$date = date("Ymdhi");
$file_name = "blisreport_".$report_type."_".$date.".doc";
header("Content-Type: application/vnd.ms-word");
header("Content-Disposition: attachment; filename=$file_name");
?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=Windows-1252" />
<title>Saves as a Word Doc</title>
</head>
<body>
<?php
$html_content = $_REQUEST['data'];
print $html_content;
?>
</body>
</html>
<?php
#
# Exports the given HTML content as word document
#
include("../includes/db_lib.php");
require_once(__DIR__."/word_export_lib.php");
putUILog('export_word_aggregate', 'X', basename($_SERVER['REQUEST_URI'], ".php"), 'X', 'X', 'X');

$report_type = isset($_REQUEST['report_type']) ? $_REQUEST['report_type'] : '';
$safe_report_type = blis_word_sanitize_filename_segment($report_type);
$html_payload = isset($_REQUEST['data']) ? $_REQUEST['data'] : '';
$html_content = blis_word_normalize_html_fragment($html_payload);

$file_prefix = "blisreport";
if($safe_report_type !== '')
{
$file_prefix .= "_".$safe_report_type;
}

$exported = blis_word_export_docx($html_content, $file_prefix);
if($exported === false)
{
# Keep backward compatibility if pandoc is unavailable.
blis_word_send_legacy_doc($html_content, $file_prefix);
}
135 changes: 135 additions & 0 deletions htdocs/export/word_export_lib.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
<?php
#
# Shared helpers to export posted HTML as a real .docx file.
#

function blis_word_normalize_html_fragment($html_fragment)
{
if(!is_string($html_fragment))
{
return '';
}

# Some callers still pass slashed payloads.
$normalized = stripcslashes($html_fragment);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since stripcslashes is being used on raw $_REQUEST data that is eventually echoed back in the blis_word_send_legacy_doc fallback, we should be careful about XSS. If Pandoc isn't available, we are essentially rendering unsanitized HTML. It might be worth adding a step to strip <script> tags or sensitive event handlers here to secure the legacy export path. Somewhat of a fallback

return str_replace("\0", '', $normalized);
}

function blis_word_sanitize_filename_segment($segment)
{
$clean = preg_replace('/[^A-Za-z0-9_.-]/', '_', (string)$segment);
if($clean === '' || $clean === null)
{
return 'blisreport';
}
return $clean;
}

function blis_word_send_legacy_doc($html_fragment, $file_prefix)
{
$date = date('YmdHi');
$file_name = blis_word_sanitize_filename_segment($file_prefix).'_'.$date.'.doc';
header('Content-Type: application/vnd.ms-word');
header('Content-Disposition: attachment; filename="'.$file_name.'"');
$safe_html = blis_word_sanitize_legacy_html($html_fragment);
echo "<!DOCTYPE html>\n<html><head><meta charset=\"UTF-8\"></head><body>";
echo $safe_html;
echo "</body></html>";
exit;
}

function blis_word_sanitize_legacy_html($html_fragment)
{
$safe = (string)$html_fragment;

# Remove high-risk executable elements.
$safe = preg_replace('/<\s*(script|iframe|object|embed|applet|meta|link|style)\b[^>]*>.*?<\s*\/\s*\1\s*>/is', '', $safe);
$safe = preg_replace('/<\s*(script|iframe|object|embed|applet|meta|link|style)\b[^>]*\/?\s*>/is', '', $safe);

# Remove inline JS event handlers (onclick, onload, etc.).
$safe = preg_replace('/\s+on[a-z]+\s*=\s*(".*?"|\'.*?\'|[^\s>]+)/is', '', $safe);

# Remove javascript: and data: URL payloads from common attributes.
$safe = preg_replace('/\s+(href|src|xlink:href)\s*=\s*("|\')\s*(javascript:|data:)[^"\']*\2/is', '', $safe);
$safe = preg_replace('/\s+(href|src|xlink:href)\s*=\s*(javascript:|data:)[^\s>]*/is', '', $safe);

return $safe;
}

function blis_word_export_docx($html_fragment, $file_prefix)
{
$pandoc_bin = blis_word_find_pandoc_bin();
if($pandoc_bin === '')
{
return false;
}

$tmp_html = tempnam(sys_get_temp_dir(), 'blis_word_html_');
$tmp_docx_base = tempnam(sys_get_temp_dir(), 'blis_word_docx_');
if($tmp_html === false || $tmp_docx_base === false)
{
return false;
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case (and probably all the other failure cases) you should be logging an error with the logger.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we return early here because tempnam failed for the second file, we might leave the first temporary file ($tmp_html) orphaned in the system's temp directory. We should ensure @Unlink($tmp_html) is called or consolidate the cleanup logic to prevent local storage bloat over time.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's good file hygiene, but it's not actually necessary. On Linux systems, and I assume macOS, /tmp is a tmpfs mount meaning that all of its files are stored in memory. When it's unmounted, the files disappear.

mitchell@kingsley:~$ mount | grep '/tmp'
tmpfs on /tmp type tmpfs (rw,nosuid,nodev,seclabel,nr_inodes=1048576,inode64,usrquota)

The Windows temp dir does NOT do this as far as I know, so deleting files on that platform makes sense. However there is a cleanup utility that can run periodically on Windows that would clean these up.

}

$tmp_docx = $tmp_docx_base.'.docx';
@unlink($tmp_docx_base);

$full_html = "<!DOCTYPE html>\n<html><head><meta charset=\"UTF-8\"></head><body>".$html_fragment."</body></html>";
$write_ok = (file_put_contents($tmp_html, $full_html) !== false);
if(!$write_ok)
{
@unlink($tmp_html);
return false;
}

$cmd = escapeshellarg($pandoc_bin)
." -f html -t docx"
." -o ".escapeshellarg($tmp_docx)
." ".escapeshellarg($tmp_html)
." 2>&1";

$command_output = array();
$exit_code = 0;
exec($cmd, $command_output, $exit_code);

@unlink($tmp_html);

if($exit_code !== 0 || !is_file($tmp_docx))
{
@unlink($tmp_docx);
return false;
}

$date = date('YmdHi');
$file_name = blis_word_sanitize_filename_segment($file_prefix).'_'.$date.'.docx';
header('Content-Type: application/vnd.openxmlformats-officedocument.wordprocessingml.document');
header('Content-Disposition: attachment; filename="'.$file_name.'"');
header('Content-Length: '.filesize($tmp_docx));

# Prevent buffered warnings/whitespace from corrupting binary docx output.
while(ob_get_level() > 0)
{
ob_end_clean();
}

readfile($tmp_docx);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a good idea to call ob_end_clean() here before readfile(). If any PHP notices or stray whitespace were echoed earlier in the execution, they could prepend the file stream and corrupt the resulting .docx file structure.

@unlink($tmp_docx);
exit;
}

function blis_word_find_pandoc_bin()
{
if(PHP_OS_FAMILY === 'Windows')
{
$output = array();
$exit_code = 0;
exec('where pandoc', $output, $exit_code);
if($exit_code === 0 && isset($output[0]))
{
return trim($output[0]);
}
return '';
}

return trim((string)shell_exec('command -v pandoc'));
}
Loading