{"id":23932,"library":"keyphrase-vectorizers","title":"Keyphrase Vectorizers","description":"Set of vectorizers that extract keyphrases with part-of-speech patterns from a collection of text documents and convert them into a document-keyphrase matrix. Current version 0.0.13, requires Python >=3.7 and spaCy. Releases are intermittent.","status":"active","version":"0.0.13","language":"python","source_language":"en","source_url":"https://github.com/TimSchopf/KeyphraseVectorizers","tags":["nlp","keyphrase extraction","vectorizer","spacy","scikit-learn"],"install":[{"cmd":"pip install keyphrase-vectorizers","lang":"bash","label":"pip"}],"dependencies":[{"reason":"Required for POS tagging and keyphrase extraction; must install a language model (e.g., en_core_web_sm).","package":"spacy","optional":false},{"reason":"Used for vectorization and matrix operations.","package":"scikit-learn","optional":false}],"imports":[{"note":"The correct import is directly from the package; nested module import fails.","wrong":"from keyphrase_vectorizers.KeyphraseVectorizers import KeyphraseCountVectorizer","symbol":"KeyphraseCountVectorizer","correct":"from keyphrase_vectorizers import KeyphraseCountVectorizer"},{"note":"Common typo in class name; there is no 'KeyphraseVectorizer' class.","wrong":"from keyphrase_vectorizers.KeyphraseVectorizer import KeyphraseTfidfVectorizer","symbol":"KeyphraseTfidfVectorizer","correct":"from keyphrase_vectorizers import KeyphraseTfidfVectorizer"}],"quickstart":{"code":"import spacy\nfrom keyphrase_vectorizers import KeyphraseCountVectorizer\n\n# Download spaCy model if not already present\n# spacy.cli.download('en_core_web_sm')\nnlp = spacy.load('en_core_web_sm')\n\ndocs = [\n    \"Natural language processing enables computers to understand human language.\",\n    \"Machine learning is a subset of artificial intelligence.\"\n]\n\nvectorizer = KeyphraseCountVectorizer(spacy_pipeline=nlp)\nX = vectorizer.fit_transform(docs)\nprint(vectorizer.get_feature_names_out())\nprint(X.toarray())","lang":"python","description":"Basic usage: load spaCy model, create vectorizer, fit on documents, and inspect extracted keyphrases."},"warnings":[{"fix":"Always load the model once and pass the nlp object: `nlp = spacy.load('en_core_web_sm'); vectorizer = KeyphraseCountVectorizer(spacy_pipeline=nlp)`","message":"The `spacy_pipeline` parameter expects a spaCy Language object (from spacy.load) or a string (e.g., 'en_core_web_sm'). Passing a string will cause spaCy to load the pipeline each time, leading to performance issues.","severity":"gotcha","affected_versions":">=0.0.11"},{"fix":"Use `workers` instead of `multiprocessing` when specifying the number of parallel processes.","message":"The parameter `multiprocessing` was renamed to `workers` in v0.0.6. Using `multiprocessing` will raise a TypeError.","severity":"deprecated","affected_versions":">=0.0.6"},{"fix":"If performance degrades, explicitly disable unnecessary pipeline components via the `spacy_exclude` parameter.","message":"In v0.0.9, the default exclusion of certain spaCy pipeline components was removed. This can slow down keyphrase extraction but ensures compatibility with all spaCy pipelines, especially transformer-based ones.","severity":"breaking","affected_versions":">=0.0.9"}],"env_vars":null,"last_verified":"2026-05-01T00:00:00.000Z","next_check":"2026-07-30T00:00:00.000Z","problems":[{"fix":"Run `pip install keyphrase-vectorizers` to install.","cause":"The package is not installed or installed under a different name.","error":"ModuleNotFoundError: No module named 'keyphrase_vectorizers'"},{"fix":"Pass either a preloaded spaCy Language object or a valid spaCy model name string, e.g., 'en_core_web_sm'.","cause":"Passed an unsupported type (e.g., integer) as `spacy_pipeline`.","error":"ValueError: The 'spacy_pipeline' parameter must be a string or spacy Language object."},{"fix":"Run `python -m spacy download en_core_web_sm` (or the equivalent for your language).","cause":"The required spaCy model is not installed.","error":"OSError: [E050] Can't find model 'en_core_web_sm'. It doesn't seem to be a Python package or a valid path to a data directory."}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}