|
import re |
|
from bs4 import BeautifulSoup |
|
|
|
def parse_html_prompt(input_str): |
|
soup = BeautifulSoup(input_str, "html.parser") |
|
|
|
|
|
p_content = soup.find("p").decode_contents().replace("<br>", "\n") |
|
p_content = re.sub(r'<span[^>]*>(.*?)</span>', r'<\1>', p_content) |
|
template = p_content.strip().replace(' <br/>', '').replace(' ', '').replace('<br/>', '') |
|
|
|
|
|
components = {} |
|
for item in soup.find_all("div", class_="component-item"): |
|
key_span = item.find("div", class_="component-key").find("span") |
|
key = key_span.get_text(strip=True) if key_span else "" |
|
value_div = item.find("div", class_="component-value") |
|
value_content = value_div.decode_contents() |
|
value_content = re.sub(r'<span[^>]*>(.*?)</span>', r'{\1}', value_content) |
|
components[key] = value_content.strip().replace(' <br/>', '').replace('<br/>', '') |
|
|
|
|
|
self_prompt = {} |
|
for item in soup.find_all("div", class_="self-info-item"): |
|
key_span = item.find("div", class_="component-key").find("span") |
|
key = key_span.get_text(strip=True) if key_span else "" |
|
value_div = item.find("div", class_="component-value") |
|
value = value_div.get_text(strip=True) if value_div else "" |
|
self_prompt[key] = value.replace(' <br/>', '').replace('<br/>', '') |
|
|
|
return { |
|
'template': template, |
|
'components': components, |
|
'self_prompt': self_prompt |
|
} |
|
|
|
|