Skip to content

Commit 3985f31

Browse files
committed
Enhance Docx to PDF conversion with multi-column support and improved image handling
1 parent b77351b commit 3985f31

5 files changed

Lines changed: 254 additions & 69 deletions

File tree

src/MiniPdf/DocxReader.cs

Lines changed: 45 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -739,6 +739,7 @@ internal static DocxDocument Read(Stream stream)
739739
var hasExplicitUnderlineDecl = false;
740740
var charSpacing = parentCharSpacing;
741741
var fontName = parentFontName;
742+
float verticalPosition = 0;
742743

743744
if (rPr != null)
744745
{
@@ -764,10 +765,16 @@ internal static DocxDocument Read(Stream stream)
764765
charSpacing = cs / 20f; // twips to points
765766

766767
fontName = ResolveRunFontName(rPr, parentFontName, defaultLatinFontName, defaultEastAsiaFontName);
768+
769+
// Vertical position (w:position w:val in half-points)
770+
var posEl = rPr.Element(W + "position");
771+
if (posEl != null && float.TryParse(posEl.Attribute(W + "val")?.Value, out var posVal))
772+
verticalPosition = posVal / 2f; // half-points to points
767773
}
768774

769775
// Collect text from <w:t>, <w:tab>, <w:br> elements
770776
bool isPageBreak = false;
777+
bool isColumnBreak = false;
771778
var text = "";
772779
foreach (var child in rElement.Elements())
773780
{
@@ -780,12 +787,14 @@ internal static DocxDocument Read(Stream stream)
780787
var brType = child.Attribute(W + "type")?.Value;
781788
if (brType == "page")
782789
isPageBreak = true;
790+
else if (brType == "column")
791+
isColumnBreak = true;
783792
else
784793
text += "\n";
785794
}
786795
}
787796

788-
if (string.IsNullOrEmpty(text) && !isPageBreak)
797+
if (string.IsNullOrEmpty(text) && !isPageBreak && !isColumnBreak)
789798
return null;
790799

791800
if (caps && !string.IsNullOrEmpty(text))
@@ -801,7 +810,7 @@ internal static DocxDocument Read(Stream stream)
801810
fontName = defaultEastAsiaFontName;
802811
}
803812

804-
return new DocxRun(text, bold, italic, fontSize, color, isPageBreak, underline, charSpacing, fontName, hasExplicitUnderlineDecl);
813+
return new DocxRun(text, bold, italic, fontSize, color, isPageBreak, underline, charSpacing, fontName, hasExplicitUnderlineDecl, isColumnBreak, verticalPosition);
805814
}
806815

807816
private static string? GetFieldInstructionType(string? instruction)
@@ -2131,7 +2140,29 @@ private static DocxPageLayout ParseSectionProperties(XElement sectPr)
21312140
if (float.TryParse(pgMar.Attribute(W + "footer")?.Value, out var fm)) footerMargin = fm * twipsToPoints;
21322141
}
21332142

2134-
return new DocxPageLayout(pageWidth, pageHeight, marginTop, marginBottom, marginLeft, marginRight, gridLinePitch, headerMargin, footerMargin);
2143+
// Parse section type (nextPage, continuous, evenPage, oddPage)
2144+
var sectionType = "nextPage";
2145+
var typeEl = sectPr.Element(W + "type");
2146+
if (typeEl != null)
2147+
{
2148+
var typeVal = typeEl.Attribute(W + "val")?.Value;
2149+
if (!string.IsNullOrEmpty(typeVal))
2150+
sectionType = typeVal;
2151+
}
2152+
2153+
// Parse column layout
2154+
int columnCount = 1;
2155+
float columnSpacing = 36f;
2156+
var colsEl = sectPr.Element(W + "cols");
2157+
if (colsEl != null)
2158+
{
2159+
if (int.TryParse(colsEl.Attribute(W + "num")?.Value, out var cn) && cn > 1)
2160+
columnCount = cn;
2161+
if (float.TryParse(colsEl.Attribute(W + "space")?.Value, out var cs) && cs > 0)
2162+
columnSpacing = cs * twipsToPoints;
2163+
}
2164+
2165+
return new DocxPageLayout(pageWidth, pageHeight, marginTop, marginBottom, marginLeft, marginRight, gridLinePitch, headerMargin, footerMargin, sectionType, columnCount, columnSpacing);
21352166
}
21362167

21372168

@@ -2364,6 +2395,10 @@ private static (Dictionary<string, DocxStyleInfo> Styles, float DefaultLineSpaci
23642395
if (defaultEastAsiaFontName == null)
23652396
defaultEastAsiaFontName = effectiveThemeEastAsiaFont;
23662397

2398+
// OOXML default line spacing is single (1.0) when not specified
2399+
if (defaultLineSpacing == 0)
2400+
defaultLineSpacing = 1.0f;
2401+
23672402
return (styles, defaultLineSpacing, defaultLineSpacingAbsolute, defaultFontName, defaultEastAsiaFontName);
23682403
}
23692404

@@ -2597,7 +2632,10 @@ internal sealed record DocxPageLayout(
25972632
float MarginRight = 72,
25982633
float GridLinePitch = 0,
25992634
float HeaderMargin = 36,
2600-
float FooterMargin = 36
2635+
float FooterMargin = 36,
2636+
string SectionType = "nextPage",
2637+
int ColumnCount = 1,
2638+
float ColumnSpacing = 36
26012639
);
26022640

26032641
/// <summary>Base type for document elements (paragraphs, tables).</summary>
@@ -2674,7 +2712,9 @@ internal sealed record DocxRun(
26742712
bool Underline = false,
26752713
float CharSpacing = 0,
26762714
string? FontName = null,
2677-
bool HasExplicitUnderlineDecl = false
2715+
bool HasExplicitUnderlineDecl = false,
2716+
bool IsColumnBreak = false,
2717+
float VerticalPosition = 0
26782718
);
26792719

26802720
/// <summary>Represents an embedded image.</summary>

0 commit comments

Comments
 (0)