se0kcess commited on
Commit
418a451
โ€ข
1 Parent(s): 2766df9
Files changed (1) hide show
  1. app.py +28 -15
app.py CHANGED
@@ -24,21 +24,34 @@ def get_pdf_text(pdf_docs):
24
 
25
  # ๊ณผ์ œ
26
  # ์•„๋ž˜ ํ…์ŠคํŠธ ์ถ”์ถœ ํ•จ์ˆ˜๋ฅผ ์ž‘์„ฑ
27
- def get_text_file(docs):
28
- # ํ…์ŠคํŠธ ํŒŒ์ผ์—์„œ ํ…์ŠคํŠธ๋ฅผ ์ฝ์–ด์˜ต๋‹ˆ๋‹ค.
29
- text_content = docs.read()
30
- return text_content
31
-
32
- def get_csv_file(docs):
33
- # CSV ํŒŒ์ผ์—์„œ ํ…์ŠคํŠธ๋ฅผ ์ฝ์–ด์˜ต๋‹ˆ๋‹ค.
34
- csv_reader = csv.reader(docs)
35
- csv_content = "\n".join(",".join(row) for row in csv_reader)
36
- return csv_content
37
-
38
- def get_json_file(docs):
39
- # JSON ํŒŒ์ผ์—์„œ ํ…์ŠคํŠธ๋ฅผ ์ฝ์–ด์˜ต๋‹ˆ๋‹ค.
40
- json_content = json.load(docs)
41
- return json.dumps(json_content, indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  # ๋ฌธ์„œ๋“ค์„ ์ฒ˜๋ฆฌํ•˜์—ฌ ํ…์ŠคํŠธ ์ฒญํฌ๋กœ ๋‚˜๋ˆ„๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
44
  def get_text_chunks(documents):
 
24
 
25
  # ๊ณผ์ œ
26
  # ์•„๋ž˜ ํ…์ŠคํŠธ ์ถ”์ถœ ํ•จ์ˆ˜๋ฅผ ์ž‘์„ฑ
27
+ def get_text_file(text_docs):
28
+ temp_dir = tempfile.TemporaryDirectory() # ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
29
+ temp_filepath = os.path.join(temp_dir.name, text_docs.name) # ์ž„์‹œ ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
30
+ with open(temp_filepath, "wb") as f: # ์ž„์‹œ ํŒŒ์ผ์„ ๋ฐ”์ด๋„ˆ๋ฆฌ ์“ฐ๊ธฐ ๋ชจ๋“œ๋กœ ์—ฝ๋‹ˆ๋‹ค.
31
+ f.write(text_docs.getvalue()) # text ๋ฌธ์„œ์˜ ๋‚ด์šฉ์„ ์ž„์‹œ ํŒŒ์ผ์— ์”๋‹ˆ๋‹ค.
32
+ text_loader = TextLoader(temp_filepath) # TextLoader๋ฅผ ์‚ฌ์šฉํ•ด text๋ฅผ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
33
+ text_doc = text_loader.load() # ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
34
+ return text_doc # ์ถ”์ถœํ•œ ํ…์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
35
+
36
+ def get_csv_file(csv_docs):
37
+ temp_dir = tempfile.TemporaryDirectory() # ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
38
+ temp_filepath = os.path.join(temp_dir.name, csv_docs.name) # ์ž„์‹œ ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
39
+ with open(temp_filepath, "wb") as f: # ์ž„์‹œ ํŒŒ์ผ์„ ๋ฐ”์ด๋„ˆ๋ฆฌ ์“ฐ๊ธฐ ๋ชจ๋“œ๋กœ ์—ฝ๋‹ˆ๋‹ค.
40
+ f.write(csv_docs.getvalue()) # CSV ๋ฌธ์„œ์˜ ๋‚ด์šฉ์„ ์ž„์‹œ ํŒŒ์ผ์— ์”๋‹ˆ๋‹ค.
41
+ csv_loader = CSVLoader(temp_filepath) # CSVLoader๋ฅผ ์‚ฌ์šฉํ•ด csv๋ฅผ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
42
+ csv_doc = csv_loader.load() # ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
43
+ return csv_doc # ์ถ”์ถœํ•œ ํ…์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
44
+
45
+ def get_json_file(json_docs):
46
+ temp_dir = tempfile.TemporaryDirectory() # ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
47
+ temp_filepath = os.path.join(temp_dir.name, json_docs.name) # ์ž„์‹œ ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
48
+ with open(temp_filepath, "wb") as f: # ์ž„์‹œ ํŒŒ์ผ์„ ๋ฐ”์ด๋„ˆ๋ฆฌ ์“ฐ๊ธฐ ๋ชจ๋“œ๋กœ ์—ฝ๋‹ˆ๋‹ค.
49
+ f.write(json_docs.getvalue()) # JSON๋ฌธ์„œ์˜ ๋‚ด์šฉ์„ ์ž„์‹œ ํŒŒ์ผ์— ์”๋‹ˆ๋‹ค.
50
+ json_loader = JSONLoader(temp_filepath) # JSONLoader๋ฅผ ์‚ฌ์šฉํ•ด JSON๋ฅผ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
51
+ json_doc = json_loader.load() # ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
52
+ return json_doc # ์ถ”์ถœํ•œ ํ…์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
53
+
54
+
55
 
56
  # ๋ฌธ์„œ๋“ค์„ ์ฒ˜๋ฆฌํ•˜์—ฌ ํ…์ŠคํŠธ ์ฒญํฌ๋กœ ๋‚˜๋ˆ„๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
57
  def get_text_chunks(documents):