Split Word Documents by Page Breaks or Section Breaks in C#

Page breaks and section breaks are two useful features for controlling page layout in MS Word and other desktop publishing programs. Page breaks are used to end a page without filling it with text. Section breaks are used to allow formatting changes (i.e., different margins, page number styles, etc.) in the same document.

Sometimes, we may want to pull the different parts that are separated by page breaks or section breaks out of the whole document storing as several individual files. This article provides two brilliant solutions in C# to split a Word document by page breaks and section breaks.

Split by Page Breaks

using System;
using Spire.Doc;
using Spire.Doc.Documents;

namespace Split_Word_Document_by_Page_Break
{
    class Program
    {
        static void Main(string[] args)
        {
            Document original = new Document();
            original.LoadFromFile("New Zealand.docx");
            Document newWord = new Document();
            Section section = newWord.AddSection();

            int index = 0;
            foreach (Section sec in original.Sections)
            {
                foreach (DocumentObject obj in sec.Body.ChildObjects)
                {
                    if (obj is Paragraph)
                    {
                        Paragraph para = obj as Paragraph;
                        section.Body.ChildObjects.Add(para.Clone());

                        foreach (DocumentObject parobj in para.ChildObjects)
                        {
                            if (parobj is Break && (parobj as Break).BreakType == BreakType.PageBreak)
                            {
                                int i = para.ChildObjects.IndexOf(parobj);
                                section.Body.LastParagraph.ChildObjects.RemoveAt(i);
                                newWord.SaveToFile(String.Format("result/out-{0}.docx", index), FileFormat.Docx);
                                index++;

                                newWord = new Document();
                                section = newWord.AddSection();
                                section.Body.ChildObjects.Add(para.Clone());
                                if (section.Paragraphs[0].ChildObjects.Count == 0)
                                {
                                    section.Body.ChildObjects.RemoveAt(0);
                                }
                                else
                                {
                                    while (i >= 0)
                                    {
                                        section.Paragraphs[0].ChildObjects.RemoveAt(i);
                                        i--;
                                    }
                                }
                            }
                        }
                    }
                    if (obj is Table)
                    {
                        section.Body.ChildObjects.Add(obj.Clone());
                    }
                }
            }
            newWord.SaveToFile(String.Format("result/out-{0}.docx", index), FileFormat.Docx);
        }
    }
}

Split by Section Breaks

using System;
using Spire.Doc;

namespace Split_Word_Document
{
    class Program
    {
        static void Main(string[] args)
        {
            Document document = new Document();
            document.LoadFromFile("Test.doc");
            Document newWord;
            for (int i = 0; i < document.Sections.Count; i++)
            {
                newWord = new Document();
                newWord.Sections.Add(document.Sections[i].Clone());
                newWord.SaveToFile(String.Format(@"test\out_{0}.docx", i));
            }
        }
    }
}

Note: These solutions are relied on free .NET Word Component, you can download it from here and reference the DLL file to your own project.