Linux vps-61133.fhnet.fr 4.9.0-19-amd64 #1 SMP Debian 4.9.320-2 (2022-06-30) x86_64
Apache/2.4.25 (Debian)
Server IP : 93.113.207.21 & Your IP : 216.73.216.122
Domains :
Cant Read [ /etc/named.conf ]
User : www-data
Terminal
Auto Root
Create File
Create Folder
Localroot Suggester
Backdoor Destroyer
Readme
/
usr /
local /
lib /
python3.5 /
dist-packages /
pdfminer /
Delete
Unzip
Name
Size
Permission
Date
Action
__pycache__
[ DIR ]
drwxr-sr-x
2025-04-10 17:08
cmap
[ DIR ]
drwxr-sr-x
2025-04-10 17:08
__init__.py
76
B
-rw-r--r--
2025-04-10 17:07
arcfour.py
886
B
-rw-r--r--
2025-04-10 17:07
ascii85.py
1.98
KB
-rw-r--r--
2025-04-10 17:07
ccitt.py
19.53
KB
-rw-r--r--
2025-04-10 17:07
cmapdb.py
11.79
KB
-rw-r--r--
2025-04-10 17:07
converter.py
21.07
KB
-rw-r--r--
2025-04-10 17:07
encodingdb.py
3.47
KB
-rw-r--r--
2025-04-10 17:07
fontmetrics.py
56.62
KB
-rw-r--r--
2025-04-10 17:07
glyphlist.py
114.46
KB
-rw-r--r--
2025-04-10 17:07
high_level.py
6.17
KB
-rw-r--r--
2025-04-10 17:07
image.py
5.38
KB
-rw-r--r--
2025-04-10 17:07
jbig2.py
9.48
KB
-rw-r--r--
2025-04-10 17:07
latin_enc.py
7.73
KB
-rw-r--r--
2025-04-10 17:07
layout.py
28.65
KB
-rw-r--r--
2025-04-10 17:07
lzw.py
2.74
KB
-rw-r--r--
2025-04-10 17:07
pdfcolor.py
779
B
-rw-r--r--
2025-04-10 17:07
pdfdevice.py
5.84
KB
-rw-r--r--
2025-04-10 17:07
pdfdocument.py
26.63
KB
-rw-r--r--
2025-04-10 17:07
pdffont.py
29.31
KB
-rw-r--r--
2025-04-10 17:07
pdfinterp.py
28.35
KB
-rw-r--r--
2025-04-10 17:07
pdfpage.py
5.18
KB
-rw-r--r--
2025-04-10 17:07
pdfparser.py
5.18
KB
-rw-r--r--
2025-04-10 17:07
pdftypes.py
9.36
KB
-rw-r--r--
2025-04-10 17:07
psparser.py
16.76
KB
-rw-r--r--
2025-04-10 17:07
rijndael.py
45.34
KB
-rw-r--r--
2025-04-10 17:07
runlength.py
1.29
KB
-rw-r--r--
2025-04-10 17:07
settings.py
15
B
-rw-r--r--
2025-04-10 17:07
utils.py
11.97
KB
-rw-r--r--
2025-04-10 17:07
Save
Rename
"""Functions that can be used for the most common use-cases for pdfminer.six""" import logging import sys from io import StringIO from .converter import XMLConverter, HTMLConverter, TextConverter, \ PDFPageAggregator from .image import ImageWriter from .layout import LAParams from .pdfdevice import TagExtractor from .pdfinterp import PDFResourceManager, PDFPageInterpreter from .pdfpage import PDFPage from .utils import open_filename def extract_text_to_fp(inf, outfp, output_type='text', codec='utf-8', laparams=None, maxpages=0, page_numbers=None, password="", scale=1.0, rotation=0, layoutmode='normal', output_dir=None, strip_control=False, debug=False, disable_caching=False, **kwargs): """Parses text from inf-file and writes to outfp file-like object. Takes loads of optional arguments but the defaults are somewhat sane. Beware laparams: Including an empty LAParams is not the same as passing None! :param inf: a file-like object to read PDF structure from, such as a file handler (using the builtin `open()` function) or a `BytesIO`. :param outfp: a file-like object to write the text to. :param output_type: May be 'text', 'xml', 'html', 'tag'. Only 'text' works properly. :param codec: Text decoding codec :param laparams: An LAParams object from pdfminer.layout. Default is None but may not layout correctly. :param maxpages: How many pages to stop parsing after :param page_numbers: zero-indexed page numbers to operate on. :param password: For encrypted PDFs, the password to decrypt. :param scale: Scale factor :param rotation: Rotation factor :param layoutmode: Default is 'normal', see pdfminer.converter.HTMLConverter :param output_dir: If given, creates an ImageWriter for extracted images. :param strip_control: Does what it says on the tin :param debug: Output more logging data :param disable_caching: Does what it says on the tin :param other: :return: nothing, acting as it does on two streams. Use StringIO to get strings. """ if debug: logging.getLogger().setLevel(logging.DEBUG) imagewriter = None if output_dir: imagewriter = ImageWriter(output_dir) rsrcmgr = PDFResourceManager(caching=not disable_caching) if output_type == 'text': device = TextConverter(rsrcmgr, outfp, codec=codec, laparams=laparams, imagewriter=imagewriter) if outfp == sys.stdout: outfp = sys.stdout.buffer if output_type == 'xml': device = XMLConverter(rsrcmgr, outfp, codec=codec, laparams=laparams, imagewriter=imagewriter, stripcontrol=strip_control) elif output_type == 'html': device = HTMLConverter(rsrcmgr, outfp, codec=codec, scale=scale, layoutmode=layoutmode, laparams=laparams, imagewriter=imagewriter) elif output_type == 'tag': device = TagExtractor(rsrcmgr, outfp, codec=codec) interpreter = PDFPageInterpreter(rsrcmgr, device) for page in PDFPage.get_pages(inf, page_numbers, maxpages=maxpages, password=password, caching=not disable_caching, check_extractable=True): page.rotate = (page.rotate + rotation) % 360 interpreter.process_page(page) device.close() def extract_text(pdf_file, password='', page_numbers=None, maxpages=0, caching=True, codec='utf-8', laparams=None): """Parse and return the text contained in a PDF file. :param pdf_file: Either a file path or a file-like object for the PDF file to be worked on. :param password: For encrypted PDFs, the password to decrypt. :param page_numbers: List of zero-indexed page numbers to extract. :param maxpages: The maximum number of pages to parse :param caching: If resources should be cached :param codec: Text decoding codec :param laparams: An LAParams object from pdfminer.layout. If None, uses some default settings that often work well. :return: a string containing all of the text extracted. """ if laparams is None: laparams = LAParams() with open_filename(pdf_file, "rb") as fp, StringIO() as output_string: rsrcmgr = PDFResourceManager() device = TextConverter(rsrcmgr, output_string, codec=codec, laparams=laparams) interpreter = PDFPageInterpreter(rsrcmgr, device) for page in PDFPage.get_pages( fp, page_numbers, maxpages=maxpages, password=password, caching=caching, check_extractable=True, ): interpreter.process_page(page) return output_string.getvalue() def extract_pages(pdf_file, password='', page_numbers=None, maxpages=0, caching=True, laparams=None): """Extract and yield LTPage objects :param pdf_file: Either a file path or a file-like object for the PDF file to be worked on. :param password: For encrypted PDFs, the password to decrypt. :param page_numbers: List of zero-indexed page numbers to extract. :param maxpages: The maximum number of pages to parse :param caching: If resources should be cached :param laparams: An LAParams object from pdfminer.layout. If None, uses some default settings that often work well. :return: """ if laparams is None: laparams = LAParams() with open_filename(pdf_file, "rb") as fp: resource_manager = PDFResourceManager() device = PDFPageAggregator(resource_manager, laparams=laparams) interpreter = PDFPageInterpreter(resource_manager, device) for page in PDFPage.get_pages(fp, page_numbers, maxpages=maxpages, password=password, caching=caching): interpreter.process_page(page) layout = device.get_result() yield layout