-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathhtmlToShopifyJSON.py
More file actions
69 lines (59 loc) · 3.25 KB
/
htmlToShopifyJSON.py
File metadata and controls
69 lines (59 loc) · 3.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from bs4 import BeautifulSoup
import json
def html_to_json(html):
soup = BeautifulSoup(html, "html.parser")
def parse_element(element):
if element.name in ["h1", "h2", "h3", "h4", "h5", "h6"]:
return {
"type": "heading",
"children": [{"type": "text", "value": element.get_text(strip=True)}],
"level": int(element.name[1])
}
elif element.name == "p":
children = []
for child in element.children:
if child.name == "em":
children.append({"type": "text", "value": child.get_text(strip=True), "italic": True})
elif child.name == "strong":
children.append({"type": "text", "value": child.get_text(strip=True), "bold": True})
elif child.name == "a":
if child.has_attr("target"):
if child.has_attr("title"):
children.append({"type": "link", "url": child["href"], "target": child["target"], "title": child["title"], "children": [{"type": "text", "value": child.get_text(strip=True)}]})
else:
children.append({"type": "link", "url": child["href"], "children": [{"type": "text", "value": child.get_text(strip=True)}]})
else:
if child.has_attr("title"):
children.append({"type": "link", "url": child["href"], "title": child["title"], "children": [{"type": "text", "value": child.get_text(strip=True)}]})
else:
children.append({"type": "link", "url": child["href"], "children": [{"type": "text", "value": child.get_text(strip=True)}]})
elif child.name is None:
children.append({"type": "text", "value": child.strip()})
return {"type": "paragraph", "children": children}
elif element.name in ["ul", "ol"]:
list_type = "unordered" if element.name == "ul" else "ordered"
children = []
for li in element.find_all("li", recursive=False):
children.append(parse_element(li))
return {"listType": list_type, "type": "list", "children": children}
elif element.name == "li":
children = []
for child in element.children:
if child.name == "i":
children.append({"type": "text", "value": child.get_text(strip=True), "italic": True})
elif child.name == "strong":
children.append({"type": "text", "value": child.get_text(strip=True), "bold": True})
elif child.name is None:
children.append({"type": "text", "value": child.strip()})
return {"type": "list-item", "children": children}
return None
root = {"type": "root", "children": []}
for element in soup.children:
parsed_element = parse_element(element)
if parsed_element:
root["children"].append(parsed_element)
return json.dumps(root, indent=2)
# Read contents of test.html into a string
with open("test.html") as file:
html_input = file.read()
print(html_to_json(html_input))