Class: PDFium::Page

Inherits:
Object
  • Object
show all
Defined in:
ext/pdfium_ext/page.cc,
ext/pdfium_ext/page.cc

Overview

A Page on a PDFium::Document

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.create(PDFIum: :Document, page_number = document.page_count) ⇒ Page

Creates a new page on a document. The page_number defaults to the Document#page_count, causing pages to be appended to the back of the document by default if no page_number is given.

Returns:



83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# File 'ext/pdfium_ext/page.cc', line 83

static VALUE
page_create(int argc, VALUE *argv, VALUE klass)
{
    VALUE rb_document, rb_page_number, options;
    rb_scan_args(argc, argv, "11:", &rb_document, &rb_page_number, &options);
    if (NIL_P(options)){
        options=rb_hash_new();
        rb_hash_aset(options, ID2SYM(rb_intern("size")),
                     rb_const_get(RB::PDFium(), rb_intern("LETTER")) );
    }

    VALUE size, rb_width, rb_height;
    if ( !NIL_P(size = RB::get_option(options,"size")) ){
        rb_width = rb_ary_entry(size, 0);
        rb_height = rb_ary_entry(size, 1);
    } else {
        rb_width = RB::get_option(options,"width");
        rb_height = RB::get_option(options,"height");
    }


    if ( NIL_P(rb_width) || NIL_P(rb_height) ){
        rb_raise(rb_eArgError, ":height or :width must be given");
    }

    DocumentWrapper *doc_wrapper;
    Data_Get_Struct(rb_document, DocumentWrapper, doc_wrapper);

    int page_number;
    if (NIL_P(rb_page_number)){
        page_number = doc_wrapper->document->GetPageCount();
    } else {
        page_number = FIX2INT(rb_page_number);
    }

    if ( page_number < 0 || page_number > doc_wrapper->document->GetPageCount() ){
        rb_raise(rb_eRangeError, "%d is out of range: 0...%d",
                 page_number, doc_wrapper->document->GetPageCount() );
    }


    CPDF_Page* newpage = (CPDF_Page*)FPDFPage_New(doc_wrapper->document, page_number,
                                                  FIX2INT(rb_width), FIX2INT(rb_height) );

    PageWrapper *page_wrapper = new PageWrapper(doc_wrapper, rb_page_number);
    page_wrapper->setPage(newpage);
    VALUE i=Data_Wrap_Struct(klass, NULL, page_gc_free, page_wrapper);
    return i;
}

.newraises RuntimeError

Pages cannot be created by using Page.new, instead Page.open or Page.create should be used

Returns:

  • (raises RuntimeError)


43
44
45
46
47
# File 'ext/pdfium_ext/page.cc', line 43

static VALUE
page_new(VALUE klass)
{
    rb_raise(rb_eRuntimeError, "Use Page.open or Page.create");
}

.open(PDFIum: :Document, page_index) ⇒ Page

Opens a given page on a document

Returns:



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'ext/pdfium_ext/page.cc', line 56

static VALUE
page_open(VALUE klass, VALUE rb_document, VALUE rb_page_number)
{
    DocumentWrapper *doc_wrapper;
    Data_Get_Struct(rb_document, DocumentWrapper, doc_wrapper);

    int pg = FIX2INT(rb_page_number);
    if ( pg < 0 || pg >= doc_wrapper->document->GetPageCount() ){
        rb_raise(rb_eRangeError, "%d is out of range: 0...%d",
                 pg, doc_wrapper->document->GetPageCount() );
    }

    PageWrapper *page_wrapper = new PageWrapper(doc_wrapper, FIX2INT(rb_page_number));
    return Data_Wrap_Struct(klass, NULL, page_gc_free, page_wrapper);
}

Instance Method Details

#as_image(width: nil, height: nil) ⇒ Image

Render a page as an image of width and height to the given file. The image type will be auto-detected from the file_path’s extension, and can be any of the formats supported by the FreeImage library freeimage.sourceforge.net/features.html

If neither the height or width are given, it will be calculated to retain the approprate page scale.

Returns an Image instance.

Example

pdf = PDFium::Document.new( "test.pdf" )
page = pdf.pages[0]
page.as_image(height: 100, width: 75).save("pg-#{page.number}-sm.png")
page.as_image(height: 500).save("pg-#{page.number}-md.png")
page.as_image(width: 1000).save("pg-#{page.number}-lg.png")

If the above page’s #dimensions were 1000x1500, then the following images would be generated:

pg-1-sm.png -> 100x75
pg-1-md.png -> 500x750
pg-1-lg.png -> 750x1000

Returns:



263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
# File 'ext/pdfium_ext/page.cc', line 263

static VALUE
page_as_image(int argc, VALUE *argv, VALUE self)
{
    CPDF_Page *page = RB2PG(self);

    VALUE rb_options;
    rb_scan_args(argc,argv,":", &rb_options);
    if (NIL_P(rb_options)){
        rb_options=rb_hash_new();
    }
    if ( TYPE(rb_options) != T_HASH ){
        rb_raise(rb_eTypeError, "wrong argument type %s (expected Hash)", rb_obj_classname(rb_options));
    }

    VALUE width_option  = rb_hash_aref(rb_options, rb_sym_width);
    VALUE height_option = rb_hash_aref(rb_options, rb_sym_height);

    int width  = NIL_P(width_option)  ? 0 : FIX2INT(width_option);
    int height = NIL_P(height_option) ? 0 : FIX2INT(height_option);
    if (!width && !height){
        width = FPDF_GetPageWidth(page) * 2;
    }

    if (!width)
        width = FPDF_GetPageWidth(page) * ( (double)height / FPDF_GetPageHeight(page) );
    if (!height)
        height = FPDF_GetPageHeight(page) * ( (double)width / FPDF_GetPageWidth(page) );

    VALUE args[2];
    args[0] = self;
    VALUE img_options = args[1] = rb_hash_new();
    rb_hash_aset(img_options, rb_sym_width,  INT2FIX(width));
    rb_hash_aset(img_options, rb_sym_height, INT2FIX(height));

    VALUE bounds_args[4];
    bounds_args[0] = rb_float_new( 0 );
    bounds_args[1] = rb_float_new( FPDF_GetPageWidth(page) );
    bounds_args[2] = rb_float_new( 0 );
    bounds_args[3] = rb_float_new( FPDF_GetPageHeight(page) );
    VALUE bounds = rb_class_new_instance( 4, bounds_args, RB::BoundingBox() );
    rb_hash_aset(img_options, ID2SYM(rb_intern("bounds")), bounds);

    return rb_class_new_instance( 2, args, RB::Image() );
}

#each_imageObject

:nodoc:



334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
# File 'ext/pdfium_ext/page.cc', line 334

static VALUE
page_each_image(VALUE self)
{
    PageWrapper *pw;
    Data_Get_Struct(self, PageWrapper, pw);

    unsigned int count = pw->page()->CountObjects();
    int image_index=0;

    for (unsigned int index=0; index < count; index++){
        CPDF_PageObject *object = pw->page()->GetObjectByIndex(index);
        if ( PDFPAGE_IMAGE == object->m_Type ){
            VALUE args[2];
            args[0] = self;
            VALUE img_options = args[1] = rb_hash_new();

            rb_hash_aset(img_options, ID2SYM(rb_intern("object_index")), INT2FIX(index));

            rb_hash_aset(img_options, ID2SYM(rb_intern("index")), INT2FIX(image_index));

            VALUE img = rb_class_new_instance( 2, args, RB::Image() );
            rb_yield( img );
            image_index++;
        }
    }
    return self;
}

#heightFloat

Returns the height of the page. The height is given in terms of points, which are set to 72 per inch. (DPI)

Returns:

  • (Float)


154
155
156
157
158
# File 'ext/pdfium_ext/page.cc', line 154

static VALUE
page_height(VALUE self)
{
    return rb_float_new( FPDF_GetPageHeight(RB2PG(self)) );
}

#imagesImageList

Returns ImageList which contains all the images on the page. Images are lazily loaded only when requested.

Example

pdf = PDFium::Document.new( "test.pdf" )
page = pdf.pages.first
page.images.each do | image |
    image.save("pg-#{page.number}-#{image.index}.png")
end

Returns:

  • (ImageList)


230
231
232
233
234
235
236
# File 'ext/pdfium_ext/page.cc', line 230

static VALUE
page_images(VALUE self)
{
    VALUE args[1];
    args[0] = self;
    return rb_class_new_instance( 1, args, RB::ImageList() );
}

#numberFixnum

Returns the page number that the page represents on the document. It is NOT zero based, meaning that the first page#number will be 1.

Warning: if pages are added/removed after the page is loaded, this value will be inaccurate.

Returns:

  • (Fixnum)


206
207
208
209
210
211
212
# File 'ext/pdfium_ext/page.cc', line 206

static VALUE
page_number(VALUE self)
{
    PageWrapper *pw;
    Data_Get_Struct(self, PageWrapper, pw);
    return INT2FIX(pw->_page_number+1);
}

#textString

Returns the text that is contained on the page as a UTF-16LE encoded string

Returns:

  • (String)


167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'ext/pdfium_ext/page.cc', line 167

static VALUE
page_text(VALUE self)
{
    static rb_encoding *enc = rb_enc_find("UTF-16LE");


    PageWrapper *pw;
    Data_Get_Struct(self, PageWrapper, pw);

    CPDF_Page *page = pw->page();
    IPDF_TextPage *text_page = (IPDF_TextPage*)FPDFText_LoadPage(page);
    //
    unsigned int buff_size = text_page->CountChars()*2 + 1; // 16 bit per char, plus terminator
    char *buffer = ALLOC_N(char, buff_size );



    FPDFText_GetText((FPDF_TEXTPAGE)text_page, 0, text_page->CountChars(), (unsigned short*)buffer);


    VALUE ret = rb_enc_str_new(buffer, buff_size-1, enc);

    xfree(buffer);

    return ret;
}

#unloadPage

Frees a large portion of the internal memory allocated to the page. When a page is parsed by the PDFIum engine, various elements are cached in memory While Ruby will eventually garbage collect the Page instance once it’s no longer in use, this method will free the memory immediatly. Page#unload is safe to use since the Page will re-load itself as needed, but calling it while the page is still in use will cause additional work by the engine since it will have to repeatedly re-parse the page when it re-loads itself.

PageList#each will call this method on each page after it yields.

Returns:



322
323
324
325
326
327
328
329
# File 'ext/pdfium_ext/page.cc', line 322

static VALUE
page_unload(VALUE self)
{
    PageWrapper *pw;
    Data_Get_Struct(self, PageWrapper, pw);
    pw->unload();
    return self;
}

#widthFloat

Returns the width of the page. The width is given in terms of points, which are set to 72 per inch. (DPI)

Returns:

  • (Float)


141
142
143
144
145
# File 'ext/pdfium_ext/page.cc', line 141

static VALUE
page_width(VALUE self)
{
    return rb_float_new( FPDF_GetPageWidth(RB2PG(self)) );
}