Split Word Documents by Page Breaks or Section Breaks in C#

Page breaks and section breaks are two useful features for controlling page layout in MS Word and other desktop publishing programs. Page breaks are used to end a page without filling it with text. Section breaks are used to allow formatting changes (i.e., different margins, page number styles, etc.) in the same document.

Sometimes, we may want to pull the different parts that are separated by page breaks or section breaks out of the whole document storing as several individual files. This article provides two brilliant solutions in C# to split a Word document by page breaks and section breaks.

Split by Page Breaks

using System;
using Spire.Doc;
using Spire.Doc.Documents;

namespace Split_Word_Document_by_Page_Break
{
    class Program
    {
        static void Main(string[] args)
        {
            Document original = new Document();
            original.LoadFromFile("New Zealand.docx");
            Document newWord = new Document();
            Section section = newWord.AddSection();

            int index = 0;
            foreach (Section sec in original.Sections)
            {
                foreach (DocumentObject obj in sec.Body.ChildObjects)
                {
                    if (obj is Paragraph)
                    {
                        Paragraph para = obj as Paragraph;
                        section.Body.ChildObjects.Add(para.Clone());

                        foreach (DocumentObject parobj in para.ChildObjects)
                        {
                            if (parobj is Break && (parobj as Break).BreakType == BreakType.PageBreak)
                            {
                                int i = para.ChildObjects.IndexOf(parobj);
                                section.Body.LastParagraph.ChildObjects.RemoveAt(i);
                                newWord.SaveToFile(String.Format("result/out-{0}.docx", index), FileFormat.Docx);
                                index++;

                                newWord = new Document();
                                section = newWord.AddSection();
                                section.Body.ChildObjects.Add(para.Clone());
                                if (section.Paragraphs[0].ChildObjects.Count == 0)
                                {
                                    section.Body.ChildObjects.RemoveAt(0);
                                }
                                else
                                {
                                    while (i >= 0)
                                    {
                                        section.Paragraphs[0].ChildObjects.RemoveAt(i);
                                        i--;
                                    }
                                }
                            }
                        }
                    }
                    if (obj is Table)
                    {
                        section.Body.ChildObjects.Add(obj.Clone());
                    }
                }
            }
            newWord.SaveToFile(String.Format("result/out-{0}.docx", index), FileFormat.Docx);
        }
    }
}

Split by Section Breaks

using System;
using Spire.Doc;

namespace Split_Word_Document
{
    class Program
    {
        static void Main(string[] args)
        {
            Document document = new Document();
            document.LoadFromFile("Test.doc");
            Document newWord;
            for (int i = 0; i < document.Sections.Count; i++)
            {
                newWord = new Document();
                newWord.Sections.Add(document.Sections[i].Clone());
                newWord.SaveToFile(String.Format(@"test\out_{0}.docx", i));
            }
        }
    }
}

Note: These solutions are relied on free .NET Word Component, you can download it from here and reference the DLL file to your own project.

Advertisements

Author: janewdaisy

.NET Program Beginner. Share methods about how to use C#/VB.NET to export data, operate Word, Excel, PDF and other useful skills.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s