LETSQL

Create a connection object:

import letsql as ls

1con = ls.connect()
1
Adjust connection parameters as needed.

Connection Parameters

do_connect

do_connect(['self', 'config=None'])

Creates a connection.

Parameters

Name Type Description Default
config Mapping[str, str | Path] | None Mapping of table names to files. None

Examples

>>> import letsql as ls
>>> con = ls.connect()

create_catalog

create_catalog(['self', 'name', 'force=False'])

create_database

create_database(['self', 'name', 'catalog=None', 'force=False'])

create_table

create_table(['self', 'name', 'obj=None', '*', 'schema=None', 'database=None', 'temp=False', 'overwrite=False'])

Create a table in Datafusion.

Parameters

Name Type Description Default
name str Name of the table to create required
obj pd.DataFrame | pa.Table | ir.Table | None The data with which to populate the table; optional, but at least one of obj or schema must be specified None
schema sch.Schema | None The schema of the table to create; optional, but at least one of obj or schema must be specified None
database str | None The name of the database in which to create the table; if not passed, the current database is used. None
temp bool Create a temporary table False
overwrite bool If True, replace the table if it already exists, otherwise fail if the table exists False

disconnect

disconnect(['self'])

drop_catalog

drop_catalog(['self', 'name', 'force=False'])

drop_database

drop_database(['self', 'name', 'catalog=None', 'force=False'])

execute

execute(['self', 'expr', '**kwargs'])

get_schema

get_schema(['self', 'table_name', '*', 'catalog=None', 'database=None'])

list_catalogs

list_catalogs(['self', 'like=None'])

list_databases

list_databases(['self', 'like=None', 'catalog=None'])

list_tables

list_tables(['self', 'like=None', 'database=None'])

Return the list of table names in the current database.

Parameters

Name Type Description Default
like str | None A pattern in Python’s regex format. None
database str | None Unused in the datafusion backend. None

Returns

Name Type Description
list[str] The list of the table names that match the pattern like.

raw_sql

raw_sql(['self', 'query'])

Execute a SQL string query against the database.

Parameters

Name Type Description Default
query str | sge.Expression Raw SQL string required

read_csv

read_csv(['self', 'path', 'table_name=None', '**kwargs'])

Register a CSV file as a table in the current database.

Parameters

Name Type Description Default
path str | Path The data source. A string or Path to the CSV file. required
table_name str | None An optional name to use for the created table. This defaults to a sequentially generated name. None
**kwargs Any Additional keyword arguments passed to Datafusion loading function. {}

Returns

Name Type Description
ir.Table The just-registered table

read_delta

read_delta(['self', 'source_table', 'table_name=None', '**kwargs'])

Register a Delta Lake table as a table in the current database.

Parameters

Name Type Description Default
source_table str | Path The data source. Must be a directory containing a Delta Lake table. required
table_name str | None An optional name to use for the created table. This defaults to a sequentially generated name. None
**kwargs Any Additional keyword arguments passed to deltalake.DeltaTable. {}

Returns

Name Type Description
ir.Table The just-registered table

read_json

read_json(['self', 'path', 'table_name=None', '**kwargs'])

read_parquet

read_parquet(['self', 'path', 'table_name=None', '**kwargs'])

Register a parquet file as a table in the current database.

Parameters

Name Type Description Default
path str | Path The data source. required
table_name str | None An optional name to use for the created table. This defaults to a sequentially generated name. None
**kwargs Any Additional keyword arguments passed to Datafusion loading function. {}

Returns

Name Type Description
ir.Table The just-registered table

read_postgres

read_postgres(['self', 'uri', '*', 'table_name=None', "database='public'"])

Register a table from a postgres instance into a DuckDB table.

Parameters

Name Type Description Default
uri str A postgres URI of the form postgres://user:password@host:port required
table_name str | None The table to read None
database str PostgreSQL database (schema) where table_name resides 'public'

Returns

Name Type Description
ir.Table The just-registered table.

read_sqlite

read_sqlite(['self', 'path', '*', 'table_name=None'])

Register a table from a SQLite database into a DuckDB table.

Parameters

Name Type Description Default
path str | Path The path to the SQLite database required
table_name str | None The table to read None

Returns

Name Type Description
ir.Table The just-registered table.

Examples

>>> import letsql as ls
>>> import sqlite3
>>> ls.options.interactive = True
>>> with sqlite3.connect("/tmp/sqlite.db") as con:
...     con.execute("DROP TABLE IF EXISTS t")
...     con.execute("CREATE TABLE t (a INT, b TEXT)")
...     con.execute(
...         "INSERT INTO t VALUES (1, 'a'), (2, 'b'), (3, 'c')"
...     )
<...>
>>> t = ls.read_sqlite(path="/tmp/sqlite.db", table_name="t")
>>> t
┏━━━━━━━┳━━━━━━━━┓
┃ a     ┃ b      ┃
┡━━━━━━━╇━━━━━━━━┩
│ int64 │ string │
├───────┼────────┤
1 │ a      │
2 │ b      │
3 │ c      │
└───────┴────────┘

register

register(['self', 'source', 'table_name=None', '**kwargs'])

Register a data set with table_name located at source.

Parameters

Name Type Description Default
source str | Path | pa.Table | pa.RecordBatch | pa.Dataset | pd.DataFrame The data source(s). Maybe a path to a file or directory of parquet/csv files, a pandas dataframe, or a pyarrow table, dataset or record batch. required
table_name str | None The name of the table None
kwargs Any Datafusion-specific keyword arguments {}

register_table_provider

register_table_provider(['self', 'source', 'table_name=None'])

register_udwf

register_udwf(['self', 'func'])

register_xgb_model

register_xgb_model(['self', 'model_name', 'source'])

Register an XGBoost model as a UDF in the letsql Backend.

Parameters

Name Type Description Default
model_name str The name of the model required
source str | Path The path to the JSON file containing the XGBoost model required

Returns

Name Type Description
typing.Callable A function that can be used to call the XGBoost model as an Ibis UDF

to_parquet

to_parquet(['self', 'expr', 'path', '*', 'params=None', '**kwargs'])

to_pyarrow

to_pyarrow(['self', 'expr', '**kwargs'])

to_pyarrow_batches

to_pyarrow_batches(['self', 'expr', '*', 'chunk_size=1000000', '**kwargs'])

truncate_table

truncate_table(['self', 'name', 'database=None', 'schema=None'])

Delete all rows from a table.

Parameters

Name Type Description Default
name str Table name required
database str | None Database name None
schema str | None Schema name None