from docx import Document import os import sys import languages def docx_replace(doc, data): paragraphs = list(doc.paragraphs) for t in doc.tables: for row in t.rows: for cell in row.cells: for paragraph in cell.paragraphs: paragraphs.append(paragraph) for key, val in data.items(): for p in paragraphs: #key_name = '${{{}}}'.format(key) # I'm using placeholders in the form ${PlaceholderName} key_name = key if key_name in p.text: #print(f'old one {p.text}') inline = p.runs # Replace strings and retain the same style. # The text to be replaced can be split over several runs so # search through, identify which runs need to have text replaced # then replace the text in those identified started = False key_index = 0 # found_runs is a list of (inline index, index of match, length of match) found_runs = list() found_all = False replace_done = False for i in range(len(inline)): # case 1: found in single run so short circuit the replace if key_name in inline[i].text and not started: found_runs.append((i, inline[i].text.find(key_name), len(key_name))) text = inline[i].text.replace(key_name, str(val)) inline[i].text = text replace_done = True found_all = True break if key_name[key_index] not in inline[i].text and not started: # keep looking ... continue # case 2: search for partial text, find first run if key_name[key_index] in inline[i].text and inline[i].text[-1] in key_name and not started: # check sequence start_index = inline[i].text.find(key_name[key_index]) check_length = len(inline[i].text) for text_index in range(start_index, check_length): if inline[i].text[text_index] != key_name[key_index]: # no match so must be false positive break if key_index == 0: started = True chars_found = check_length - start_index key_index += chars_found found_runs.append((i, start_index, chars_found)) if key_index != len(key_name): continue else: # found all chars in key_name found_all = True break # case 2: search for partial text, find subsequent run if key_name[key_index] in inline[i].text and started and not found_all: # check sequence chars_found = 0 check_length = len(inline[i].text) for text_index in range(0, check_length): if inline[i].text[text_index] == key_name[key_index]: key_index += 1 chars_found += 1 else: break # no match so must be end found_runs.append((i, 0, chars_found)) if key_index == len(key_name): found_all = True break if found_all and not replace_done: for i, item in enumerate(found_runs): index, start, length = [t for t in item] if i == 0: text = inline[index].text.replace(inline[index].text[start:start + length], str(val)) inline[index].text = text else: text = inline[index].text.replace(inline[index].text[start:start + length], '') inline[index].text = text #print(p.text) def translate_fill(document_name,output_file, src, trg): template_document = Document(document_name) variables = {} for paragraph in template_document.paragraphs: if(paragraph.text.strip() != ""): variables[paragraph.text] = languages(paragraph.text, src, trg) for t in template_document.tables: for row in t.rows: for cell in row.cells: for paragraph in cell.paragraphs: if(paragraph.text.strip() != ""): variables[paragraph.text] = languages(paragraph.text, src, trg) docx_replace(template_document, variables) template_document.save(output_file) return variables sys.modules[__name__] = translate_fill ## args = translate_fill(document_name, output_name, src, trg)