-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils_pptx.py
43 lines (38 loc) · 1.28 KB
/
utils_pptx.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import pptx
def get_pptx_info(path):
"""This will return the doc info infomation from the
Named file."""
data = {}
doc = pptx.Presentation(path)
# get the core properties from the file...
# https://python-docx.readthedocs.io/en/latest/api/document.html#coreproperties-objects
cp = doc.core_properties
data['author'] = cp.author
data['category'] = cp.category
data['comments'] = cp.comments
data['content_status'] = cp.content_status
data['created'] = cp.created
data['identifier'] = cp.identifier
data['keywords'] = cp.keywords
data['language'] = cp.language
data['last_modified_by'] = cp.last_modified_by
data['last_printed'] = cp.last_printed
data['modified'] = cp.modified
data['revision'] = cp.revision
data['subject'] = cp.subject
data['title'] = cp.title
data['version'] = cp.version
return data
def get_pptx_text(path):
data = []
prs = pptx.Presentation(path)
for idx, slide in enumerate(prs.slides):
for shape in slide.shapes:
if hasattr(shape, "text"):
element = {
"page": idx,
"shape": shape.name,
"text": shape.text
}
data.append(element)
return data