diff --git a/MariGold.OpenXHTML/DocxElement.cs b/MariGold.OpenXHTML/DocxElement.cs index 30806ca..8fef2a2 100644 --- a/MariGold.OpenXHTML/DocxElement.cs +++ b/MariGold.OpenXHTML/DocxElement.cs @@ -13,6 +13,8 @@ internal abstract class DocxElement protected const string whiteSpace = " "; protected readonly IOpenXmlContext context; internal EventHandler ParagraphCreated; + protected static readonly Regex newLineRegex = new Regex(@"(?:\n|\r\n)", RegexOptions.Compiled); + protected static readonly Regex whitespaceRegex = new Regex(@"[\r\n ]+", RegexOptions.Compiled); protected void RunCreated(DocxNode node, Run run) { @@ -142,21 +144,29 @@ protected void ProcessBlockElement(DocxNode node, ref Paragraph paragraph, Dicti protected void ProcessParagraph(DocxNode child, DocxNode node, DocxNode paragraphNode, ref Paragraph paragraph) { - if (!IsEmptyText(child, out string text)) + if (TryGetText(child, out string text)) { - if (paragraph == null) + if (paragraph == null && !IsEmptyText(text)) { paragraph = node.Parent.AppendChild(new Paragraph()); OnParagraphCreated(paragraphNode, paragraph); } - Run run = paragraph.AppendChild(new Run(new Text() + if (paragraph != null) { - Text = ClearHtml(text), - Space = SpaceProcessingModeValues.Preserve - })); + if (child.Previous != null && child.Previous.InnerHtml.EndsWith(whiteSpace)) + { + text = text.TrimStart(); + } + + Run run = paragraph.AppendChild(new Run(new Text() + { + Text = ClearHtml(text), + Space = SpaceProcessingModeValues.Preserve + })); - RunCreated(node, run); + RunCreated(node, run); + } } } @@ -216,67 +226,36 @@ internal string ClearHtml(string html) { return string.Empty; } - - html = WebUtility.HtmlDecode(html); - html = html.Replace(" ", whiteSpace); - html = html.Replace("&", "&"); - Regex regex = new Regex(Environment.NewLine + "\\s+"); - Match match = regex.Match(html); + html = whitespaceRegex.Replace(html, " "); - while (match.Success) - { - //match.Length - 1 for leave a single space. Otherwise the sentences will collide. - html = html.Remove(match.Index, match.Length - 1); - match = regex.Match(html); - } - - html = html.Replace(Environment.NewLine, string.Empty); + html = WebUtility.HtmlDecode(html); return html; } internal bool IsEmptyText(string html) { - if (string.IsNullOrEmpty(html)) - { - return true; - } - - html = html.Replace(Environment.NewLine, string.Empty); - - if (string.IsNullOrEmpty(html.Trim())) - { - return true; - } - - return false; + return string.IsNullOrEmpty(html.Trim()); } - internal bool IsEmptyText(DocxNode node, out string text) + internal bool TryGetText(DocxNode node, out string text) { - text = string.Empty; - - if (string.IsNullOrEmpty(node.InnerHtml)) - { - return true; - } - - text = node.InnerHtml.Replace(Environment.NewLine, string.Empty); + text = node.InnerHtml; if (!string.IsNullOrEmpty(text.Trim())) { - return false; + return true; } else if (!string.IsNullOrEmpty(text) && node.Previous != null && !node.Previous.IsText && !node.Previous.InnerHtml.EndsWith(whiteSpace) && node.Next != null && !node.Next.IsText && !node.Next.InnerHtml.StartsWith(whiteSpace)) { text = whiteSpace; - return false; + return true; } - return true; + return false; } internal abstract bool CanConvert(DocxNode node); diff --git a/MariGold.OpenXHTML/Elements/DocxBody.cs b/MariGold.OpenXHTML/Elements/DocxBody.cs index 50bf2a2..03da383 100644 --- a/MariGold.OpenXHTML/Elements/DocxBody.cs +++ b/MariGold.OpenXHTML/Elements/DocxBody.cs @@ -16,21 +16,29 @@ private void ProcessBody(DocxNode node, ref Paragraph paragraph, Dictionary(paragraphBorders); - hrParagraph.ParagraphProperties.Append(paragraphBorders); - - Run run = hrParagraph.AppendChild(new Run(new Text())); + + var rectangle = new Rectangle(); + rectangle.Style = "width:0;height:1.5pt"; + rectangle.Horizontal = true; + rectangle.HorizontalStandard = true; + rectangle.FillColor = "#a0a0a0"; + rectangle.Stroked = false; + rectangle.HorizontalAlignment = HorizontalRuleAlignmentValues.Center; + var picture = new Picture(rectangle); + + Run run = hrParagraph.AppendChild(new Run(picture)); RunCreated(node, run); } } diff --git a/MariGold.OpenXHTML/Elements/DocxOL.cs b/MariGold.OpenXHTML/Elements/DocxOL.cs index 6b2c3b7..ffa971c 100644 --- a/MariGold.OpenXHTML/Elements/DocxOL.cs +++ b/MariGold.OpenXHTML/Elements/DocxOL.cs @@ -24,7 +24,7 @@ private void DefineLevel(NumberFormatValues numberFormat, int levelIndex) Level level = new Level() { LevelIndex = gLevelId }; StartNumberingValue startNumberingValue = new StartNumberingValue() { Val = 1 }; NumberingFormat numberingFormat = new NumberingFormat() { Val = numberFormat }; - LevelText levelText = new LevelText() { Val = $"%{gLevelId + 1})" }; //Later we need a provison to configure this text. + LevelText levelText = new LevelText() { Val = $"%{gLevelId + 1}." }; //Later we need a provison to configure this text. LevelJustification levelJustification = new LevelJustification() { Val = LevelJustificationValues.Left }; PreviousParagraphProperties previousParagraphProperties = new PreviousParagraphProperties(); diff --git a/MariGold.OpenXHTML/Elements/DocxUL.cs b/MariGold.OpenXHTML/Elements/DocxUL.cs index 6778e36..f3aed85 100644 --- a/MariGold.OpenXHTML/Elements/DocxUL.cs +++ b/MariGold.OpenXHTML/Elements/DocxUL.cs @@ -103,7 +103,7 @@ private void InitNumberDefinitions(int levelIndex) Level level = new Level() { LevelIndex = levelIndex }; StartNumberingValue startNumberingValue = new StartNumberingValue() { Val = 1 }; NumberingFormat numberingFormat = new NumberingFormat() { Val = NumberFormatValues.Bullet }; - LevelText levelText = new LevelText() { Val = "·" }; + LevelText levelText = new LevelText() { Val = "" }; LevelJustification levelJustification = new LevelJustification() { Val = LevelJustificationValues.Left }; PreviousParagraphProperties previousParagraphProperties = new PreviousParagraphProperties(); @@ -122,7 +122,8 @@ private void InitNumberDefinitions(int levelIndex) { Hint = FontTypeHintValues.Default, Ascii = "Symbol", - HighAnsi = "Symbol" + HighAnsi = "Symbol", + ComplexScript = "Symbol" }; numberingSymbolRunProperties.Append(runFonts); diff --git a/MariGold.OpenXHTML/IOpenXmlContext.cs b/MariGold.OpenXHTML/IOpenXmlContext.cs index b14ee5f..3ae4b05 100644 --- a/MariGold.OpenXHTML/IOpenXmlContext.cs +++ b/MariGold.OpenXHTML/IOpenXmlContext.cs @@ -4,7 +4,7 @@ using DocumentFormat.OpenXml.Packaging; using DocumentFormat.OpenXml.Wordprocessing; - internal interface IOpenXmlContext + internal interface IOpenXmlContext : IDisposable { string ImagePath{ get; set; } string BaseURL{ get; set; } diff --git a/MariGold.OpenXHTML/OpenXmlContext.cs b/MariGold.OpenXHTML/OpenXmlContext.cs index 15389d7..e1eaf47 100644 --- a/MariGold.OpenXHTML/OpenXmlContext.cs +++ b/MariGold.OpenXHTML/OpenXmlContext.cs @@ -97,6 +97,39 @@ private void SaveNumberDefinitions() } } + private void SaveStyleDefinitions() + { + if (mainPart.StyleDefinitionsPart is null) + { + _ = mainPart.AddNewPart("styleDefinitionsPart"); + } + + var styles = new Styles(); + + // Hyperlink + var hyperlink = new Style { StyleId = "Hyperlink", Type = StyleValues.Character }; + hyperlink.StyleName = new StyleName { Val = "Hyperlink" }; + hyperlink.UnhideWhenUsed = new UnhideWhenUsed(); + if (hyperlink.StyleRunProperties is null) + { + hyperlink.StyleRunProperties = new StyleRunProperties(); + } + hyperlink.StyleRunProperties.Append(new Color { Val = "0563C1", ThemeColor = ThemeColorValues.Hyperlink }); + hyperlink.StyleRunProperties.Append(new Underline { Val = UnderlineValues.Single }); + styles.Append(hyperlink); + + // Headings + for (int i = 1; i <= 6; i++) + { + var heading = new Style { StyleId = $"Heading{i}", Type = StyleValues.Paragraph }; + heading.StyleName = new StyleName { Val = $"heading {i}" }; + heading.LinkedStyle = new LinkedStyle { Val = $"Heading{i}Char" }; + styles.Append(heading); + } + + mainPart.StyleDefinitionsPart.Styles = styles; + } + internal OpenXmlContext(WordprocessingDocument document) { this.document = document; @@ -203,14 +236,9 @@ public Int16 ListNumberId public void Save() { SaveNumberDefinitions(); + SaveStyleDefinitions(); - Document.Save(); - - document.Close(); - document.Dispose(); - - document = null; - mainPart = null; + document.Save(); } public DocxElement Convert(DocxNode node) @@ -276,5 +304,13 @@ public IDocxInterchanger GetInterchanger() { return new DocxInterchanger(); } + + public void Dispose() + { + document.Dispose(); + + document = null; + mainPart = null; + } } } diff --git a/MariGold.OpenXHTML/Styles/DocxDirection.cs b/MariGold.OpenXHTML/Styles/DocxDirection.cs new file mode 100644 index 0000000..091c098 --- /dev/null +++ b/MariGold.OpenXHTML/Styles/DocxDirection.cs @@ -0,0 +1,53 @@ +using DocumentFormat.OpenXml; +using DocumentFormat.OpenXml.Wordprocessing; +using System; +using System.Collections.Generic; +using System.Text; + +namespace MariGold.OpenXHTML +{ + internal static class DocxDirection + { + internal const string direction = "direction"; + + internal const string ltr = "ltr"; + internal const string rtl = "rtl"; + + private static bool GetDirectionValue(string style, out DirectionValues direction) + { + direction = DirectionValues.Ltr; + bool assigned = false; + + switch (style.ToLower()) + { + case ltr: + assigned = true; + direction = DirectionValues.Ltr; + break; + + case rtl: + assigned = true; + direction = DirectionValues.Rtl; + break; + } + + return assigned; + } + + internal static void ApplyBidi(string style, OpenXmlElement styleElement) + { + if (GetDirectionValue(style, out DirectionValues direction) && direction == DirectionValues.Rtl) + { + styleElement.Append(new BiDi()); + } + } + + internal static void ApplyDirection(string style, OpenXmlElement styleElement) + { + if (GetDirectionValue(style, out DirectionValues direction) && direction == DirectionValues.Rtl) + { + styleElement.Append(new RightToLeftText()); + } + } + } +} diff --git a/MariGold.OpenXHTML/Styles/DocxFontStyle.cs b/MariGold.OpenXHTML/Styles/DocxFontStyle.cs index 3a84085..20af7bc 100644 --- a/MariGold.OpenXHTML/Styles/DocxFontStyle.cs +++ b/MariGold.OpenXHTML/Styles/DocxFontStyle.cs @@ -61,7 +61,8 @@ private static string CleanFonts(string fonts) internal static void ApplyFontFamily(string style, OpenXmlElement styleElement) { - styleElement.Append(new RunFonts() { Ascii = CleanFonts(style) }); + var fontFamilies = CleanFonts(style); + styleElement.Append(new RunFonts() { Ascii = fontFamilies, ComplexScript = fontFamilies }); } internal static void ApplyFontWeight(string style, OpenXmlElement styleElement) @@ -70,6 +71,7 @@ internal static void ApplyFontWeight(string style, OpenXmlElement styleElement) string.Compare(bolder, style, StringComparison.InvariantCultureIgnoreCase) == 0) { styleElement.Append(new Bold()); + styleElement.Append(new BoldComplexScript()); } } @@ -78,6 +80,7 @@ internal static void ApplyFontStyle(string style, OpenXmlElement styleElement) if (string.Compare(italic, style, StringComparison.InvariantCultureIgnoreCase) == 0) { styleElement.Append(new Italic()); + styleElement.Append(new ItalicComplexScript()); } } @@ -106,11 +109,13 @@ internal static void ApplyUnderline(OpenXmlElement styleElement) internal static void ApplyFontItalic(OpenXmlElement styleElement) { styleElement.Append(new Italic()); + styleElement.Append(new ItalicComplexScript()); } internal static void ApplyBold(OpenXmlElement styleElement) { styleElement.Append(new Bold()); + styleElement.Append(new BoldComplexScript()); } internal static void ApplyFontSize(string style, OpenXmlElement styleElement) @@ -120,20 +125,23 @@ internal static void ApplyFontSize(string style, OpenXmlElement styleElement) if (fontSize != 0) { fontSize = decimal.Round(fontSize); - styleElement.Append(new FontSize() { Val = fontSize.ToString("N0") }); + var fontSizeString = fontSize.ToString("N0"); + styleElement.Append(new FontSize() { Val = fontSizeString }); + styleElement.Append(new FontSizeComplexScript() { Val = fontSizeString }); } } internal static void ApplyFont(int size, bool isBold, OpenXmlElement styleElement) { - FontSize fontSize = new FontSize() { Val = size.ToString() }; - if (isBold) { styleElement.Append(new Bold()); + styleElement.Append(new BoldComplexScript()); } - styleElement.Append(fontSize); + var sizeString = size.ToString(); + styleElement.Append(new FontSize() { Val = sizeString }); + styleElement.Append(new FontSizeComplexScript() { Val = sizeString }); } } } diff --git a/MariGold.OpenXHTML/Styles/DocxParagraphStyle.cs b/MariGold.OpenXHTML/Styles/DocxParagraphStyle.cs index fb1d939..ce40e52 100644 --- a/MariGold.OpenXHTML/Styles/DocxParagraphStyle.cs +++ b/MariGold.OpenXHTML/Styles/DocxParagraphStyle.cs @@ -32,6 +32,59 @@ static internal void SetIndentation(Paragraph element, int indent) element.ParagraphProperties.Append(new Indentation() { Left = indent.ToString() }); } + private void CheckFonts(DocxNode node, ParagraphMarkRunProperties properties) + { + string fontFamily = node.ExtractStyleValue(DocxFontStyle.fontFamily); + string fontWeight = node.ExtractStyleValue(DocxFontStyle.fontWeight); + string fontStyle = node.ExtractStyleValue(DocxFontStyle.fontStyle); + + if (!string.IsNullOrEmpty(fontFamily)) + { + DocxFontStyle.ApplyFontFamily(fontFamily, properties); + } + + if (!string.IsNullOrEmpty(fontWeight)) + { + DocxFontStyle.ApplyFontWeight(fontWeight, properties); + } + + if (!string.IsNullOrEmpty(fontStyle)) + { + DocxFontStyle.ApplyFontStyle(fontStyle, properties); + } + } + + private void CheckFontStyle(DocxNode node, ParagraphMarkRunProperties properties) + { + string fontSize = node.ExtractStyleValue(DocxFontStyle.fontSize); + string textDecoration = node.ExtractStyleValue(DocxFontStyle.textDecoration); + + if (string.IsNullOrEmpty(textDecoration)) + { + textDecoration = node.ExtractStyleValue(DocxFontStyle.textDecorationLine); + } + + if (!string.IsNullOrEmpty(fontSize)) + { + DocxFontStyle.ApplyFontSize(fontSize, properties); + } + + if (!string.IsNullOrEmpty(textDecoration)) + { + DocxFontStyle.ApplyTextDecoration(textDecoration, properties); + } + } + + private void ProcessDirection(DocxNode node, ParagraphProperties properties) + { + string styleValue = node.ExtractStyleValue(DocxDirection.direction); + + if (!string.IsNullOrEmpty(styleValue)) + { + DocxDirection.ApplyBidi(styleValue, properties); + } + } + internal void Process(Paragraph element, DocxNode node) { ParagraphProperties properties = element.ParagraphProperties; @@ -64,7 +117,36 @@ internal void Process(Paragraph element, DocxNode node) { DocxAlignment.ApplyTextAlign(textAlign, properties); } - + + ProcessDirection(node, properties); + + #region Set Run Properties for the Paragraph Mark + var runProperties = properties.ParagraphMarkRunProperties; + + if (runProperties == null) + { + runProperties = new ParagraphMarkRunProperties(); + } + + //Order of assigning styles to run property is important. The order should not change. + CheckFonts(node, runProperties); + + string color = node.ExtractStyleValue(DocxColor.color); + + if (!string.IsNullOrEmpty(color)) + { + DocxColor.ApplyColor(color, runProperties); + } + + CheckFontStyle(node, runProperties); + + if (properties.ParagraphMarkRunProperties == null && runProperties.HasChildren) + { + properties.ParagraphMarkRunProperties = runProperties; + } + #endregion + + if (element.ParagraphProperties == null && properties.HasChildren) { element.ParagraphProperties = properties; diff --git a/MariGold.OpenXHTML/Styles/DocxRunStyle.cs b/MariGold.OpenXHTML/Styles/DocxRunStyle.cs index 5655dee..12a182f 100644 --- a/MariGold.OpenXHTML/Styles/DocxRunStyle.cs +++ b/MariGold.OpenXHTML/Styles/DocxRunStyle.cs @@ -72,6 +72,16 @@ private void ProcessVerticalAlign(DocxNode node, RunProperties properties) } } + private void ProcessDirection(DocxNode node, RunProperties properties) + { + string styleValue = node.ExtractStyleValue(DocxDirection.direction); + + if (!string.IsNullOrEmpty(styleValue)) + { + DocxDirection.ApplyDirection(styleValue, properties); + } + } + internal void Process(Run element, DocxNode node) { RunProperties properties = element.RunProperties; @@ -97,6 +107,8 @@ internal void Process(Run element, DocxNode node) ProcessVerticalAlign(node, properties); + ProcessDirection(node, properties); + if (element.RunProperties == null && properties.HasChildren) { element.RunProperties = properties; diff --git a/MariGold.OpenXHTML/WordDocument.cs b/MariGold.OpenXHTML/WordDocument.cs index 366828c..0fb242c 100644 --- a/MariGold.OpenXHTML/WordDocument.cs +++ b/MariGold.OpenXHTML/WordDocument.cs @@ -8,7 +8,7 @@ using System.IO; using MariGold.HtmlParser; - public sealed class WordDocument + public sealed class WordDocument : IDisposable { private readonly IOpenXmlContext context; @@ -85,7 +85,7 @@ public WordDocument(string fileName) context = new OpenXmlContext(WordprocessingDocument.Create(fileName, WordprocessingDocumentType.Document)); } - public WordDocument(MemoryStream stream) + public WordDocument(Stream stream) { if (stream == null) { @@ -119,5 +119,21 @@ public void Save() { context.Save(); } + + public void Close() + { + context.WordprocessingDocument.Close(); + } + + public void SaveAndClose() + { + Save(); + Close(); + } + + public void Dispose() + { + context.Dispose(); + } } }