{"id":27026,"library":"goose3","title":"Goose3","description":"Goose3 is an HTML content/article extractor and web scraper for Python 3 (requires Python >=3.9). It extracts the main content, title, authors, metadata (OpenGraph, schema.org), and images from news articles and web pages. The current version is 3.1.21, with irregular releases as fixes accumulate.","status":"active","version":"3.1.21","language":"python","source_language":"en","source_url":"https://github.com/goose3/goose3","tags":["web scraping","content extraction","article extraction","html parser","news"],"install":[{"cmd":"pip install goose3","lang":"bash","label":"Standard install"}],"dependencies":[],"imports":[{"note":"The old 'goose' library is a different, unmaintained project.","wrong":"from goose import Goose","symbol":"Goose","correct":"from goose3 import Goose"},{"note":"Article is typically accessed via Goose.extract() return, not imported directly by users.","symbol":"Article","correct":"from goose3.article import Article"}],"quickstart":{"code":"from goose3 import Goose\n\nurl = 'https://www.bbc.com/news/world-us-canada-68942345'\nwith Goose() as g:\n    article = g.extract(url=url)\nprint(article.title)\nprint(article.cleaned_text[:200])","lang":"python","description":"Extract article title and cleaned text from a URL."},"warnings":[{"fix":"Replace `article.getTags()` with `article.tags`, `article.getTones()` with `article.tones`.","message":"camelCase methods (e.g., `getTags()`, `getTones()`) are deprecated since v3.1.13. Use snake_case equivalents (`tags`, `tones`).","severity":"deprecated","affected_versions":">=3.1.13"},{"fix":"Use a headless browser like Selenium or Playwright to get the rendered HTML, then pass it to Goose3.","message":"Goose3 does not handle JavaScript-rendered pages. Only static HTML content is extracted.","severity":"gotcha","affected_versions":"all"},{"fix":"Upgrade Python to 3.9 or higher.","message":"Python 3.7 and 3.8 support removed in v3.1.20. Requires Python >=3.9.","severity":"breaking","affected_versions":">=3.1.20"},{"fix":"Always provide a full URL with http:// or https:// scheme.","message":"The `extract()` method can raise `requests.exceptions.MissingSchema` if the URL doesn't include a scheme (e.g., 'example.com' instead of 'https://example.com').","severity":"gotcha","affected_versions":"all"}],"env_vars":null,"last_verified":"2026-05-01T00:00:00.000Z","next_check":"2026-07-30T00:00:00.000Z","problems":[{"fix":"Install goose3: pip install goose3 and import from goose3 import Goose.","cause":"Attempting to import from the old 'goose' library (Python 2) instead of 'goose3'.","error":"ModuleNotFoundError: No module named 'goose'"},{"fix":"Prepend 'https://' to the URL before calling extract().","cause":"URL passed to extract() is missing the scheme (http:// or https://).","error":"requests.exceptions.MissingSchema: Invalid URL 'example.com/article': No schema supplied. Perhaps you meant http://example.com/article?"},{"fix":"Check that article is not None and that the page contains the expected data.","cause":"Occasionally occurs when accessing article.tags or article.authors if extraction fails (e.g., network error or non-article page).","error":"TypeError: 'NoneType' object is not iterable"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}