oroszgy's picture
feat: initial commit
f214d73 unverified
raw
history blame
No virus
513 Bytes
from typing import List, Tuple
import pandas as pd
from examples.common import NLP
from textacy.extract.keyterms.sgrank import sgrank as keywords
def process(text: str) -> pd.DataFrame:
doc = NLP(text)
terms: List[Tuple[str, float]] = keywords(doc, topn=10)
term_set = [t for t, _ in terms]
return pd.DataFrame([{"Keyphrase": term, "Probability": prob}
for term, prob in terms
if all(other == term or term not in other for other in term_set)])