{"library":"newspaper3k","title":"Newspaper3k","description":"Newspaper3k is a Python 3 library designed for simplified article discovery, extraction, and natural language processing (NLP) from news websites. It excels at extracting main content, metadata like title, author, publish date, images, and videos, as well as generating keywords and summaries. Although its last PyPI release was in 2018, it remains functional for many use cases, though a community fork (`newspaper4k`) provides more active development and modern features.","language":"python","status":"maintenance","last_verified":"Fri May 15","install":{"commands":["pip install newspaper3k"],"cli":null},"imports":["from newspaper import Article","import newspaper; newspaper.build(...)","from newspaper import Config"],"auth":{"required":false,"env_vars":[]},"quickstart":{"code":"import newspaper\nfrom newspaper import Article, Config\nimport os\n\n# Configure a user agent to avoid being blocked\nconfig = Config()\nconfig.browser_user_agent = os.environ.get('USER_AGENT', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36')\nconfig.request_timeout = 10 # Set a timeout\n\n# Ensure NLTK 'punkt' is downloaded for NLP features\ntry:\n    import nltk\n    nltk.data.find('tokenizers/punkt')\nexcept nltk.downloader.DownloadError:\n    print(\"Downloading NLTK 'punkt' tokenizer...\")\n    nltk.download('punkt')\n    print(\"NLTK 'punkt' tokenizer downloaded.\")\n\nurl = 'https://www.reuters.com/world/europe/ukraine-braces-russian-attacks-east-civilians-flee-2022-04-08/'\narticle = Article(url, config=config)\n\narticle.download()\narticle.parse()\n\nprint(f\"Title: {article.title}\")\nprint(f\"Authors: {article.authors}\")\nprint(f\"Publish Date: {article.publish_date}\")\nprint(f\"Top Image: {article.top_image}\")\nprint(f\"\\nText (first 500 chars):\\n{article.text[:500]}...\")\n\narticle.nlp() # Run NLP for keywords and summary\nprint(f\"\\nKeywords: {article.keywords}\")\nprint(f\"Summary: {article.summary[:200]}...\")\n\n# Example for a news source\n# cnn_paper = newspaper.build('http://cnn.com', config=config)\n# print(f\"CNN has {cnn_paper.size()} articles.\")\n# for article_obj in cnn_paper.articles[:3]:\n#     print(f\"  - {article_obj.url}\")","lang":"python","description":"This quickstart demonstrates how to extract an article's content and metadata, including NLP-generated keywords and summaries. It also includes configuration for a user agent and NLTK 'punkt' tokenizer download, which is necessary for NLP features.","tag":null,"tag_description":null,"last_tested":null,"results":[]},"compatibility":{"tag":null,"tag_description":null,"last_tested":"2026-05-15","installed_version":"0.2.8","pypi_latest":"0.2.8","is_stale":false,"summary":{"python_range":"3.10–3.9","success_rate":100,"avg_install_s":11,"avg_import_s":null,"wheel_type":"sdist"},"results":[{"runtime":"python:3.10-alpine","python_version":"3.10","os_libc":"alpine (musl)","variant":"newspaper3k","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":"121.1M"},{"runtime":"python:3.10-slim","python_version":"3.10","os_libc":"slim (glibc)","variant":"newspaper3k","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":9.8,"import_time_s":null,"mem_mb":null,"disk_size":"123M"},{"runtime":"python:3.11-alpine","python_version":"3.11","os_libc":"alpine (musl)","variant":"newspaper3k","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":"114.7M"},{"runtime":"python:3.11-slim","python_version":"3.11","os_libc":"slim (glibc)","variant":"newspaper3k","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":9.2,"import_time_s":null,"mem_mb":null,"disk_size":"117M"},{"runtime":"python:3.12-alpine","python_version":"3.12","os_libc":"alpine (musl)","variant":"newspaper3k","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":"103.5M"},{"runtime":"python:3.12-slim","python_version":"3.12","os_libc":"slim (glibc)","variant":"newspaper3k","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":13.1,"import_time_s":null,"mem_mb":null,"disk_size":"106M"},{"runtime":"python:3.13-alpine","python_version":"3.13","os_libc":"alpine (musl)","variant":"newspaper3k","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":"103.2M"},{"runtime":"python:3.13-slim","python_version":"3.13","os_libc":"slim (glibc)","variant":"newspaper3k","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":12,"import_time_s":null,"mem_mb":null,"disk_size":"105M"},{"runtime":"python:3.9-alpine","python_version":"3.9","os_libc":"alpine (musl)","variant":"newspaper3k","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":null,"import_time_s":null,"mem_mb":null,"disk_size":"101.7M"},{"runtime":"python:3.9-slim","python_version":"3.9","os_libc":"slim (glibc)","variant":"newspaper3k","exit_code":0,"wheel_type":"sdist","failure_reason":null,"import_side_effects":"broken","install_time_s":10.9,"import_time_s":null,"mem_mb":null,"disk_size":"104M"}]}}