awacke1 commited on
Commit
a06050e
β€’
1 Parent(s): 443263b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -2
app.py CHANGED
@@ -4,7 +4,8 @@ import os
4
  import base64
5
  import glob
6
  import json
7
- from xml.etree import ElementTree
 
8
  from datetime import datetime
9
  from dotenv import load_dotenv
10
  from openai import ChatCompletion
@@ -38,6 +39,19 @@ def get_table_download_link(file_path):
38
  href = f'<a href="data:file/htm;base64,{b64}" target="_blank" download="{os.path.basename(file_path)}">{os.path.basename(file_path)}</a>'
39
  return href
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  def read_file_content(file):
42
  if file.type == "application/json":
43
  content = json.load(file)
@@ -45,10 +59,15 @@ def read_file_content(file):
45
  elif file.type == "text/html":
46
  content = BeautifulSoup(file, "html.parser")
47
  return content.text
48
- elif file.type == "application/xml" or file.type == "text/xml":
49
  tree = ElementTree.parse(file)
50
  root = tree.getroot()
51
  return ElementTree.tostring(root, encoding='unicode')
 
 
 
 
 
52
  elif file.type == "text/plain":
53
  return file.getvalue().decode()
54
  else:
 
4
  import base64
5
  import glob
6
  import json
7
+ import re
8
+ from xml.etree import ElementTree as ET
9
  from datetime import datetime
10
  from dotenv import load_dotenv
11
  from openai import ChatCompletion
 
39
  href = f'<a href="data:file/htm;base64,{b64}" target="_blank" download="{os.path.basename(file_path)}">{os.path.basename(file_path)}</a>'
40
  return href
41
 
42
+ def CompressXML_Old(xml_text):
43
+ words = xml_text.split()
44
+ english_words = [word for word in words if re.fullmatch(r'[A-Za-z ]*', word)]
45
+ compressed_text = ' '.join(english_words)
46
+ return compressed_text
47
+
48
+ def CompressXML(xml_text):
49
+ tree = ET.ElementTree(ET.fromstring(xml_text))
50
+ for elem in tree.iter():
51
+ if isinstance(elem.tag, ET.Comment):
52
+ elem.getparent().remove(elem)
53
+ return ET.tostring(tree.getroot(), encoding='unicode')
54
+
55
  def read_file_content(file):
56
  if file.type == "application/json":
57
  content = json.load(file)
 
59
  elif file.type == "text/html":
60
  content = BeautifulSoup(file, "html.parser")
61
  return content.text
62
+ elif file.type == "application/xmlold" or file.type == "text/xmlold":
63
  tree = ElementTree.parse(file)
64
  root = tree.getroot()
65
  return ElementTree.tostring(root, encoding='unicode')
66
+ elif file.type == "application/xml" or file.type == "text/xml":
67
+ tree = ElementTree.parse(file)
68
+ root = tree.getroot()
69
+ xml_text = ElementTree.tostring(root, encoding='unicode')
70
+ return CompressXML(xml_text)
71
  elif file.type == "text/plain":
72
  return file.getvalue().decode()
73
  else: