{"id":23979,"library":"llama-index-readers-web","title":"LlamaIndex Readers Web","description":"A collection of web-based data readers for LlamaIndex, enabling ingestion from URLs, web pages, and online documents. Currently at version 0.6.0, released under the LlamaIndex ecosystem. Release cadence is irregular, tied to LlamaIndex updates.","status":"active","version":"0.6.0","language":"python","source_language":"en","source_url":"https://github.com/run-llama/llama_index","tags":["llama-index","readers","web","scraping"],"install":[{"cmd":"pip install llama-index-readers-web","lang":"bash","label":"Install from PyPI"}],"dependencies":[{"reason":"Core LlamaIndex abstractions (Document, BaseReader) are required.","package":"llama-index-core","optional":true}],"imports":[{"note":"Wrong nesting; correct is direct import from web module.","wrong":"from llama_index.readers.web.BeautifulSoupWebReader import BeautifulSoupWebReader","symbol":"BeautifulSoupWebReader","correct":"from llama_index.readers.web import BeautifulSoupWebReader"},{"note":"Old top-level import path removed in recent versions.","wrong":"from llama_index.web import SimpleWebPageReader","symbol":"SimpleWebPageReader","correct":"from llama_index.readers.web import SimpleWebPageReader"},{"note":"Correct import path.","wrong":null,"symbol":"TrafilaturaWebReader","correct":"from llama_index.readers.web import TrafilaturaWebReader"}],"quickstart":{"code":"from llama_index.readers.web import SimpleWebPageReader\n\nreader = SimpleWebPageReader()\ndocs = reader.load_data(urls=[\"https://example.com\"])\nprint(docs[0].text[:100])","lang":"python","description":"Load a web page into a Document object using SimpleWebPageReader."},"warnings":[{"fix":"Use 'from llama_index.readers.web import ...' instead of 'from llama_index import ...'.","message":"Import paths changed in v0.10+. Readers are now under llama_index.readers.web, not top-level llama_index.web.","severity":"breaking","affected_versions":">=0.10.0"},{"fix":"Install extra dependencies: pip install llama-index-readers-web[beautifulsoup4] or pip install beautifulsoup4 requests.","message":"SimpleWebPageReader requires requests and beautifulsoup4 as dependencies; they are not installed by default.","severity":"deprecated","affected_versions":"all"},{"fix":"Install lxml if you encounter parser errors: pip install lxml.","message":"Some readers (e.g., BeautifulSoupWebReader) require additional dependencies like lxml for certain parsers.","severity":"gotcha","affected_versions":"all"}],"env_vars":null,"last_verified":"2026-05-01T00:00:00.000Z","next_check":"2026-07-30T00:00:00.000Z","problems":[{"fix":"Upgrade to latest version (>=0.10.0) and use correct import: from llama_index.readers.web import ...","cause":"Older version of llama-index-readers-web or incorrect import path.","error":"ModuleNotFoundError: No module named 'llama_index.readers.web'"},{"fix":"Change import to: from llama_index.readers.web import SimpleWebPageReader","cause":"Using old top-level import path from before v0.10.","error":"ImportError: cannot import name 'SimpleWebPageReader' from 'llama_index'"},{"fix":"Pass a non-empty list of URL strings: reader.load_data(urls=['https://example.com'])","cause":"Called load_data() with an empty list or no urls parameter.","error":"ValueError: You must provide at least one URL."}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}