{"id":24646,"library":"sqlframe","title":"SQLFrame","description":"SQLFrame is a Python library that translates PySpark DataFrame API calls into SQL queries for multiple database engines (BigQuery, DuckDB, Postgres, Snowflake, Spark, etc.). Version 4.1.0 requires Python >=3.10 and uses sqlglot for SQL generation. Release cadence is approximately bi-weekly.","status":"active","version":"4.1.0","language":"python","source_language":"en","source_url":"https://github.com/eakmanrq/sqlframe","tags":["pyspark","sql","dataframe","sqlglot","bigquery","duckdb","snowflake","postgres","spark"],"install":[{"cmd":"pip install sqlframe","lang":"bash","label":"Core installation"},{"cmd":"pip install sqlframe[bigquery]","lang":"bash","label":"With BigQuery support"},{"cmd":"pip install sqlframe[duckdb]","lang":"bash","label":"With DuckDB support"},{"cmd":"pip install sqlframe[snowflake]","lang":"bash","label":"With Snowflake support"},{"cmd":"pip install sqlframe[postgres]","lang":"bash","label":"With Postgres support"},{"cmd":"pip install sqlframe[spark]","lang":"bash","label":"With Spark support"}],"dependencies":[{"reason":"SQL generation engine","package":"sqlglot","optional":false},{"reason":"Required for DuckDB engine","package":"duckdb","optional":true},{"reason":"Required for Snowflake engine","package":"snowflake-connector-python","optional":true},{"reason":"Required for Postgres engine","package":"psycopg2-binary","optional":true},{"reason":"Required for BigQuery engine","package":"google-cloud-bigquery","optional":true},{"reason":"Required for Spark engine","package":"pyspark","optional":true}],"imports":[{"note":"Session is directly available from sqlframe, not from a submodule","wrong":"from sqlframe.session import Session","symbol":"Session","correct":"from sqlframe import Session"},{"note":"DataFrame is top-level in sqlframe, not in sqlframe.sql","wrong":"from sqlframe.sql import DataFrame","symbol":"DataFrame","correct":"from sqlframe import DataFrame"},{"note":"Functions module is directly under sqlframe, not under sqlframe.sql","wrong":"from sqlframe.sql import functions as F","symbol":"functions","correct":"from sqlframe import functions as F"}],"quickstart":{"code":"import os\nfrom sqlframe import Session\n\n# Create a session for DuckDB (no external DB needed)\nengine = \"duckdb\"\nos.environ[\"SQLFRAME_ENGINE\"] = engine  # optional\nsession = Session.builder.config(\"extension\", engine).getOrCreate()\n\n# Create a DataFrame from a list of tuples\ndf = session.createDataFrame([(1, \"Alice\"), (2, \"Bob\")], schema=[\"id\", \"name\"])\ndf.show()\n\n# Apply transformations\ndf_filtered = df.filter(df.name == \"Alice\").select(df.id)\nprint(df_filtered.sql())  # print generated SQL\n","lang":"python","description":"Quickstart using DuckDB engine (no external database needed). Set up a session, create a DataFrame, apply filters, and inspect generated SQL."},"warnings":[{"fix":"Upgrade to 4.0.0+ and use Session.builder.config(\"extension\", \"duckdb\") (or other engine).","message":"In version 4.0.0, the engine configuration changed. Previously you might have set an environment variable or used a different builder pattern. Now use Session.builder.config(\"extension\", \"engine_name\").getOrCreate(). The old pattern with spark = SQLFrame(engine='duckdb') is removed.","severity":"breaking","affected_versions":"<4.0.0"},{"fix":"Use .show() to preview results, .collect() to get a list of Row objects, or .toPandas() to get a Pandas DataFrame.","message":"SQLFrame does not execute queries by default. Use .show() or .collect() to actually run the query against the engine. Calling .sql() only returns the generated SQL string.","severity":"gotcha","affected_versions":"all"},{"fix":"Change imports to from sqlframe import DataFrame, Session, functions as F.","message":"The old import path from sqlframe.sql import DataFrame is deprecated as of version 4.0.0. Use from sqlframe import DataFrame instead.","severity":"deprecated","affected_versions":">=4.0.0"},{"fix":"Create one session per engine: session_duckdb = Session.builder.config(\"extension\", \"duckdb\").getOrCreate() and session_bq = Session.builder.config(\"extension\", \"bigquery\").getOrCreate().","message":"When using multiple engines in the same project, you must create a separate Session for each engine. Sharing sessions across different engine types will lead to errors.","severity":"gotcha","affected_versions":"all"}],"env_vars":null,"last_verified":"2026-05-01T00:00:00.000Z","next_check":"2026-07-30T00:00:00.000Z","problems":[{"fix":"pip install sqlframe[engine] where engine is duckdb, bigquery, snowflake, postgres, or spark.","cause":"sqlframe package not installed.","error":"ModuleNotFoundError: No module named 'sqlframe'"},{"fix":"Upgrade sqlframe: pip install --upgrade sqlframe","cause":"Outdated version of sqlframe (<4.0.0) where Session was not yet introduced.","error":"AttributeError: module 'sqlframe' has no attribute 'Session'"},{"fix":"Ensure you have registered the table using session.registerDataFrame(df, \"table_name\") or using createOrReplaceTempView on the DataFrame.","cause":"Trying to run a SQL statement referencing a table that does not exist in the target database.","error":"pyspark.sql.utils.AnalysisException: Table or view not found: ..."},{"fix":"Check the engine name is one of: duckdb, spark, snowflake, postgres, bigquery. Install the corresponding extra: pip install sqlframe[bigquery]","cause":"Misspelled engine name in config or missing optional dependency for that engine.","error":"Exception: Engine 'bigquery' not supported. Supported engines: duckdb, spark, snowflake, postgres, bigquery"},{"fix":"Upgrade sqlframe to >=3.0.0.","cause":"Using an old version that does not have .show() method (added in 3.0+).","error":"AttributeError: 'DataFrame' object has no attribute 'show'"}],"ecosystem":"pypi","meta_description":null,"install_score":null,"install_tag":null,"quickstart_score":null,"quickstart_tag":null}