{"id":26832,"library":"cleantext","title":"cleantext","description":"An open-source Python package to clean raw text data. Version 1.1.4 provides simple functions to normalize whitespace, remove URLs, emojis, numbers, punctuation, and more. The package is relatively stable with low release cadence. It is widely used for quick text preprocessing in NLP pipelines.","status":"active","version":"1.1.4","language":"python","source_language":"en","source_url":"https://github.com/prasanthg3/cleantext","tags":["text-cleaning","nlp","preprocessing","data-cleaning"],"install":[{"cmd":"pip install cleantext","lang":"bash","label":"Install from PyPI"}],"dependencies":[],"imports":[{"note":"Direct import is straightforward","wrong":"","symbol":"clean","correct":"from cleantext import clean"}],"quickstart":{"code":"from cleantext import clean\n\ntext = \"Hello! Check out https://example.com 😊 I have 5 apples...\"\ncleaned = clean(\n    text,\n    extra_spaces=True,\n    lowercase=False,\n    numbers=False,\n    punct=False,\n    replace_with_url='<URL>',\n    replace_with_number='<NUM>',\n    replace_with_punct='',\n    lang='en'\n)\nprint(cleaned)","lang":"python","description":"Basic example: removes URLs, numbers, punctuation, and extra spaces, leaving text intact."},"warnings":[{"fix":"Use 'cleaned_text = ' '.join(clean(...))' if you need a single string.","message":"The 'clean' function returns a list of tokens by default (not a string). If you expect a single string, set 'lowercase=False' and then join the list, or check output type.","severity":"gotcha","affected_versions":"all versions"},{"fix":"Remove 'fix_unicode' from the call. Unicode normalization is now handled automatically.","message":"The 'fix_unicode' parameter was removed in version 1.0.0. Using it raises an error.","severity":"deprecated","affected_versions":">=1.0.0"},{"fix":"Review your function calls and adjust parameter names. For example, 'no_urls' is now 'replace_with_url'.","message":"In version 1.0.0, the 'clean' function changed its default parameter values and removed some arguments like 'fix_unicode'. Code written for <1.0.0 may break.","severity":"breaking","affected_versions":">=1.0.0"},{"fix":"Use the 'replace_with_punct' parameter with a placeholder instead of removing punctuation entirely.","message":"The 'clean' function can remove too much. For instance, setting 'punct=False' removes all punctuation including periods in abbreviations.","severity":"gotcha","affected_versions":"all versions"}],"env_vars":null,"last_verified":"2026-05-01T00:00:00.000Z","next_check":"2026-07-30T00:00:00.000Z","problems":[{"fix":"Run 'pip install cleantext' to install the package.","cause":"Package not installed.","error":"ModuleNotFoundError: No module named 'cleantext'"},{"fix":"Remove 'fix_unicode' from your function call. Unicode normalization is handled automatically.","cause":"The 'fix_unicode' parameter was removed in version 1.0.0.","error":"TypeError: clean() got an unexpected keyword argument 'fix_unicode'"},{"fix":"Use 'cleaned_text = ' '.join(clean(...))' to get a single string.","cause":"The 'clean' function returns a list of tokens by default.","error":"clean() returns a list, not string"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}