LETSQL
Create a connection object:
import letsql as ls
1= ls.connect() con
- 1
- Adjust connection parameters as needed.
Connection Parameters
do_connect
do_connect(self, config=None)
Creates a connection.
Parameters
Name | Type | Description | Default |
---|---|---|---|
config |
Mapping[str, str | Path] | None | Mapping of table names to files. | None |
Examples
>>> import letsql as ls
>>> con = ls.connect()
create_catalog
create_catalog(self, name, force=False)
create_database
create_database(self, name, catalog=None, force=False)
create_table
create_table(self, name, obj=None, *, schema=None, database=None, temp=False, overwrite=False)
Create a table in Datafusion.
Parameters
Name | Type | Description | Default |
---|---|---|---|
name |
str | Name of the table to create | required |
obj |
pd.DataFrame | pa.Table | ir.Table | None | The data with which to populate the table; optional, but at least one of obj or schema must be specified |
None |
schema |
sch.Schema | None | The schema of the table to create; optional, but at least one of obj or schema must be specified |
None |
database |
str | None | The name of the database in which to create the table; if not passed, the current database is used. | None |
temp |
bool | Create a temporary table | False |
overwrite |
bool | If True , replace the table if it already exists, otherwise fail if the table exists |
False |
disconnect
disconnect(self)
drop_catalog
drop_catalog(self, name, force=False)
drop_database
drop_database(self, name, catalog=None, force=False)
execute
execute(self, expr, **kwargs)
get_schema
get_schema(self, table_name, *, catalog=None, database=None)
list_catalogs
list_catalogs(self, like=None)
list_databases
list_databases(self, like=None, catalog=None)
list_tables
list_tables(self, like=None, database=None)
Return the list of table names in the current database.
Parameters
Name | Type | Description | Default |
---|---|---|---|
like |
str | None | A pattern in Python’s regex format. | None |
database |
str | None | Unused in the datafusion backend. | None |
Returns
Type | Description |
---|---|
list[str] | The list of the table names that match the pattern like . |
raw_sql
raw_sql(self, query)
Execute a SQL string query
against the database.
Parameters
Name | Type | Description | Default |
---|---|---|---|
query |
str | sge.Expression | Raw SQL string | required |
read_csv
read_csv(self, path, table_name=None, **kwargs)
Register a CSV file as a table in the current database.
Parameters
Name | Type | Description | Default |
---|---|---|---|
path |
str | Path | The data source. A string or Path to the CSV file. | required |
table_name |
str | None | An optional name to use for the created table. This defaults to a sequentially generated name. | None |
**kwargs |
Any | Additional keyword arguments passed to Datafusion loading function. | {} |
Returns
Type | Description |
---|---|
ir.Table | The just-registered table |
read_delta
read_delta(self, source_table, table_name=None, **kwargs)
Register a Delta Lake table as a table in the current database.
Parameters
Name | Type | Description | Default |
---|---|---|---|
source_table |
str | Path | The data source. Must be a directory containing a Delta Lake table. | required |
table_name |
str | None | An optional name to use for the created table. This defaults to a sequentially generated name. | None |
**kwargs |
Any | Additional keyword arguments passed to deltalake.DeltaTable. | {} |
Returns
Type | Description |
---|---|
ir.Table | The just-registered table |
read_json
read_json(self, path, table_name=None, **kwargs)
read_parquet
read_parquet(self, path, table_name=None, **kwargs)
Register a parquet file as a table in the current database.
Parameters
Name | Type | Description | Default |
---|---|---|---|
path |
str | Path | The data source. | required |
table_name |
str | None | An optional name to use for the created table. This defaults to a sequentially generated name. | None |
**kwargs |
Any | Additional keyword arguments passed to Datafusion loading function. | {} |
Returns
Type | Description |
---|---|
ir.Table | The just-registered table |
read_postgres
read_postgres(self, uri, *, table_name=None, database='public')
Register a table from a postgres instance into a DuckDB table.
Parameters
Name | Type | Description | Default |
---|---|---|---|
uri |
str | A postgres URI of the form postgres://user:password@host:port |
required |
table_name |
str | None | The table to read | None |
database |
str | PostgreSQL database (schema) where table_name resides |
'public' |
Returns
Type | Description |
---|---|
ir.Table | The just-registered table. |
read_sqlite
read_sqlite(self, path, *, table_name=None)
Register a table from a SQLite database into a DuckDB table.
Parameters
Name | Type | Description | Default |
---|---|---|---|
path |
str | Path | The path to the SQLite database | required |
table_name |
str | None | The table to read | None |
Returns
Type | Description |
---|---|
ir.Table | The just-registered table. |
Examples
>>> import letsql as ls
>>> import sqlite3
>>> ls.options.interactive = True
>>> with sqlite3.connect("/tmp/sqlite.db") as con:
"DROP TABLE IF EXISTS t")
... con.execute("CREATE TABLE t (a INT, b TEXT)")
... con.execute(
... con.execute("INSERT INTO t VALUES (1, 'a'), (2, 'b'), (3, 'c')"
...
... )<...>
>>> t = ls.read_sqlite(path="/tmp/sqlite.db", table_name="t")
>>> t
┏━━━━━━━┳━━━━━━━━┓
┃ a ┃ b ┃
┡━━━━━━━╇━━━━━━━━┩
│ int64 │ string │
├───────┼────────┤1 │ a │
│ 2 │ b │
│ 3 │ c │
│ └───────┴────────┘
register
register(self, source, table_name=None, **kwargs)
Register a data set with table_name
located at source
.
Parameters
Name | Type | Description | Default |
---|---|---|---|
source |
str | Path | pa.Table | pa.RecordBatch | pa.Dataset | pd.DataFrame | The data source(s). Maybe a path to a file or directory of parquet/csv files, a pandas dataframe, or a pyarrow table, dataset or record batch. | required |
table_name |
str | None | The name of the table | None |
kwargs |
Any | Datafusion-specific keyword arguments | {} |
register_table_provider
register_table_provider(self, source, table_name=None)
register_xgb_model
register_xgb_model(self, model_name, source)
Register an XGBoost model as a UDF in the letsql
Backend.
Parameters
Name | Type | Description | Default |
---|---|---|---|
model_name |
str | The name of the model | required |
source |
str | Path | The path to the JSON file containing the XGBoost model | required |
Returns
Type | Description |
---|---|
typing.Callable | A function that can be used to call the XGBoost model as an Ibis UDF |
sql
sql(self, query, schema=None, dialect=None)
to_pyarrow
to_pyarrow(self, expr, **kwargs)
to_pyarrow_batches
to_pyarrow_batches(self, expr, *, chunk_size=1000000, **kwargs)
truncate_table
truncate_table(self, name, database=None, schema=None)
Delete all rows from a table.
Parameters
Name | Type | Description | Default |
---|---|---|---|
name |
str | Table name | required |
database |
str | None | Database name | None |
schema |
str | None | Schema name | None |