Spaces:
Sleeping
Sleeping
Update analyze.py
Browse files- analyze.py +3 -5
analyze.py
CHANGED
@@ -11,7 +11,8 @@ Row = dict[str, Any]
|
|
11 |
T = TypeVar("T")
|
12 |
BATCH_SIZE = 1
|
13 |
MAX_TEXT_LENGTH = 500
|
14 |
-
|
|
|
15 |
|
16 |
|
17 |
class PresidioEntity(TypedDict):
|
@@ -121,16 +122,13 @@ def analyze(
|
|
121 |
def presidio_scan_entities(
|
122 |
rows: Iterable[Row], scanned_columns: list[str], columns_descriptions: list[str]
|
123 |
) -> Iterable[PresidioEntity]:
|
124 |
-
global batch_analyzer
|
125 |
cache: dict[str, list[RecognizerResult]] = {}
|
126 |
-
if batch_analyzer is None:
|
127 |
-
batch_analyser = BatchAnalyzerEngine(AnalyzerEngine())
|
128 |
rows_with_scanned_columns_only = (
|
129 |
{column_name: get_strings(row[column_name])[:MAX_TEXT_LENGTH] for column_name in scanned_columns} for row in rows
|
130 |
)
|
131 |
for indices, batch in batched(rows_with_scanned_columns_only, BATCH_SIZE, with_indices=True):
|
132 |
yield from analyze(
|
133 |
-
batch_analyzer=
|
134 |
batch=batch,
|
135 |
indices=indices,
|
136 |
scanned_columns=scanned_columns,
|
|
|
11 |
T = TypeVar("T")
|
12 |
BATCH_SIZE = 1
|
13 |
MAX_TEXT_LENGTH = 500
|
14 |
+
analyzer = AnalyzerEngine()
|
15 |
+
batch_analyzer = BatchAnalyzerEngine(analyzer)
|
16 |
|
17 |
|
18 |
class PresidioEntity(TypedDict):
|
|
|
122 |
def presidio_scan_entities(
|
123 |
rows: Iterable[Row], scanned_columns: list[str], columns_descriptions: list[str]
|
124 |
) -> Iterable[PresidioEntity]:
|
|
|
125 |
cache: dict[str, list[RecognizerResult]] = {}
|
|
|
|
|
126 |
rows_with_scanned_columns_only = (
|
127 |
{column_name: get_strings(row[column_name])[:MAX_TEXT_LENGTH] for column_name in scanned_columns} for row in rows
|
128 |
)
|
129 |
for indices, batch in batched(rows_with_scanned_columns_only, BATCH_SIZE, with_indices=True):
|
130 |
yield from analyze(
|
131 |
+
batch_analyzer=batch_analyzer,
|
132 |
batch=batch,
|
133 |
indices=indices,
|
134 |
scanned_columns=scanned_columns,
|