LETSQL
Create a connection object:
import letsql as ls
1= ls.connect() con
- 1
- Adjust connection parameters as needed.
Connection Parameters
do_connect
do_connect(['self', 'config=None'])
Creates a connection.
Parameters
Name | Type | Description | Default |
---|---|---|---|
config | Mapping[str, str | Path] | None | Mapping of table names to files. | None |
Examples
>>> import letsql as ls
>>> con = ls.connect()
create_catalog
create_catalog(['self', 'name', 'force=False'])
create_database
create_database(['self', 'name', 'catalog=None', 'force=False'])
create_table
create_table(['self', 'name', 'obj=None', '*', 'schema=None', 'database=None', 'temp=False', 'overwrite=False'])
Create a table in Datafusion.
Parameters
Name | Type | Description | Default |
---|---|---|---|
name | str | Name of the table to create | required |
obj | pd.DataFrame | pa.Table | ir.Table | None | The data with which to populate the table; optional, but at least one of obj or schema must be specified |
None |
schema | sch.Schema | None | The schema of the table to create; optional, but at least one of obj or schema must be specified |
None |
database | str | None | The name of the database in which to create the table; if not passed, the current database is used. | None |
temp | bool | Create a temporary table | False |
overwrite | bool | If True , replace the table if it already exists, otherwise fail if the table exists |
False |
disconnect
disconnect(['self'])
drop_catalog
drop_catalog(['self', 'name', 'force=False'])
drop_database
drop_database(['self', 'name', 'catalog=None', 'force=False'])
execute
execute(['self', 'expr', '**kwargs'])
get_schema
get_schema(['self', 'table_name', '*', 'catalog=None', 'database=None'])
list_catalogs
list_catalogs(['self', 'like=None'])
list_databases
list_databases(['self', 'like=None', 'catalog=None'])
list_tables
list_tables(['self', 'like=None', 'database=None'])
Return the list of table names in the current database.
Parameters
Name | Type | Description | Default |
---|---|---|---|
like | str | None | A pattern in Python’s regex format. | None |
database | str | None | Unused in the datafusion backend. | None |
Returns
Name | Type | Description |
---|---|---|
list[str] | The list of the table names that match the pattern like . |
raw_sql
raw_sql(['self', 'query'])
Execute a SQL string query
against the database.
Parameters
Name | Type | Description | Default |
---|---|---|---|
query | str | sge.Expression | Raw SQL string | required |
read_csv
read_csv(['self', 'path', 'table_name=None', '**kwargs'])
Register a CSV file as a table in the current database.
Parameters
Name | Type | Description | Default |
---|---|---|---|
path | str | Path | The data source. A string or Path to the CSV file. | required |
table_name | str | None | An optional name to use for the created table. This defaults to a sequentially generated name. | None |
**kwargs | Any | Additional keyword arguments passed to Datafusion loading function. | {} |
Returns
Name | Type | Description |
---|---|---|
ir.Table | The just-registered table |
read_delta
read_delta(['self', 'source_table', 'table_name=None', '**kwargs'])
Register a Delta Lake table as a table in the current database.
Parameters
Name | Type | Description | Default |
---|---|---|---|
source_table | str | Path | The data source. Must be a directory containing a Delta Lake table. | required |
table_name | str | None | An optional name to use for the created table. This defaults to a sequentially generated name. | None |
**kwargs | Any | Additional keyword arguments passed to deltalake.DeltaTable. | {} |
Returns
Name | Type | Description |
---|---|---|
ir.Table | The just-registered table |
read_json
read_json(['self', 'path', 'table_name=None', '**kwargs'])
read_parquet
read_parquet(['self', 'path', 'table_name=None', '**kwargs'])
Register a parquet file as a table in the current database.
Parameters
Name | Type | Description | Default |
---|---|---|---|
path | str | Path | The data source. | required |
table_name | str | None | An optional name to use for the created table. This defaults to a sequentially generated name. | None |
**kwargs | Any | Additional keyword arguments passed to Datafusion loading function. | {} |
Returns
Name | Type | Description |
---|---|---|
ir.Table | The just-registered table |
read_postgres
read_postgres(['self', 'uri', '*', 'table_name=None', "database='public'"])
Register a table from a postgres instance into a DuckDB table.
Parameters
Name | Type | Description | Default |
---|---|---|---|
uri | str | A postgres URI of the form postgres://user:password@host:port |
required |
table_name | str | None | The table to read | None |
database | str | PostgreSQL database (schema) where table_name resides |
'public' |
Returns
Name | Type | Description |
---|---|---|
ir.Table | The just-registered table. |
read_sqlite
read_sqlite(['self', 'path', '*', 'table_name=None'])
Register a table from a SQLite database into a DuckDB table.
Parameters
Name | Type | Description | Default |
---|---|---|---|
path | str | Path | The path to the SQLite database | required |
table_name | str | None | The table to read | None |
Returns
Name | Type | Description |
---|---|---|
ir.Table | The just-registered table. |
Examples
>>> import letsql as ls
>>> import sqlite3
>>> ls.options.interactive = True
>>> with sqlite3.connect("/tmp/sqlite.db") as con:
"DROP TABLE IF EXISTS t")
... con.execute("CREATE TABLE t (a INT, b TEXT)")
... con.execute(
... con.execute("INSERT INTO t VALUES (1, 'a'), (2, 'b'), (3, 'c')"
...
... )<...>
>>> t = ls.read_sqlite(path="/tmp/sqlite.db", table_name="t")
>>> t
┏━━━━━━━┳━━━━━━━━┓
┃ a ┃ b ┃
┡━━━━━━━╇━━━━━━━━┩
│ int64 │ string │
├───────┼────────┤1 │ a │
│ 2 │ b │
│ 3 │ c │
│ └───────┴────────┘
register
register(['self', 'source', 'table_name=None', '**kwargs'])
Register a data set with table_name
located at source
.
Parameters
Name | Type | Description | Default |
---|---|---|---|
source | str | Path | pa.Table | pa.RecordBatch | pa.Dataset | pd.DataFrame | The data source(s). Maybe a path to a file or directory of parquet/csv files, a pandas dataframe, or a pyarrow table, dataset or record batch. | required |
table_name | str | None | The name of the table | None |
kwargs | Any | Datafusion-specific keyword arguments | {} |
register_table_provider
register_table_provider(['self', 'source', 'table_name=None'])
register_udwf
register_udwf(['self', 'func'])
register_xgb_model
register_xgb_model(['self', 'model_name', 'source'])
Register an XGBoost model as a UDF in the letsql
Backend.
Parameters
Name | Type | Description | Default |
---|---|---|---|
model_name | str | The name of the model | required |
source | str | Path | The path to the JSON file containing the XGBoost model | required |
Returns
Name | Type | Description |
---|---|---|
typing.Callable | A function that can be used to call the XGBoost model as an Ibis UDF |
to_parquet
to_parquet(['self', 'expr', 'path', '*', 'params=None', '**kwargs'])
to_pyarrow
to_pyarrow(['self', 'expr', '**kwargs'])
to_pyarrow_batches
to_pyarrow_batches(['self', 'expr', '*', 'chunk_size=1000000', '**kwargs'])
truncate_table
truncate_table(['self', 'name', 'database=None', 'schema=None'])
Delete all rows from a table.
Parameters
Name | Type | Description | Default |
---|---|---|---|
name | str | Table name | required |
database | str | None | Database name | None |
schema | str | None | Schema name | None |