chdb

chDB is an embedded OLAP SQL Engine 🚀 powered by ClickHouse

APACHE-2.0 License

Downloads
65.7K
Stars
1.7K
Committers
13

chDB

English

chDB ClickHouse SQL OLAP chDB: ClickHouse as a Function

  • Python SQL OLAP ClickHouse
  • ClickHouse
  • ParquetCSVJSONArrowORC 60
  • Python DB API 2.0 , example

chDB macOSx86_64 ARM64 Linux Python 3.8+

pip install chdb

python3 -m chdb SQL [OutputFormat]

python3 -m chdb "SELECT 1,'abc'" Pretty

chdb DB-API

SQL

import chdb
res = chdb.query('select version()', 'Pretty'); print(res)

Parquet CSV

#  tests/format_output.py
res = chdb.query('select * from file("data.parquet", Parquet)', 'JSON'); print(res)
res = chdb.query('select * from file("data.csv", CSV)', 'CSV');  print(res)
print(f"SQL read {res.rows_read()} rows, {res.bytes_read()} bytes, elapsed {res.elapsed()} seconds")

Pandas DataFrame

#  https://clickhouse.com/docs/en/interfaces/formats
chdb.query('select * from file("data.parquet", Parquet)', 'Dataframe')

Pandas DataFrame

import chdb.dataframe as cdf
import pandas as pd
# Join 2 DataFrames
df1 = pd.DataFrame({'a': [1, 2, 3], 'b': ["one", "two", "three"]})
df2 = pd.DataFrame({'c': [1, 2, 3], 'd': ["", "", ""]})
ret_tbl = cdf.query(sql="select * from __tbl1__ t1 join __tbl2__ t2 on t1.a = t2.c",
                  tbl1=df1, tbl2=df2)
print(ret_tbl)
# Query on the DataFrame Table
print(ret_tbl.query('select b, sum(a) from __table__ group by b'))
from chdb import session as chs

## DB, Table, View
sess = chs.Session()
sess.query("CREATE DATABASE IF NOT EXISTS db_xxx ENGINE = Atomic")
sess.query("CREATE TABLE IF NOT EXISTS db_xxx.log_table_xxx (x String, y Int) ENGINE = Log;")
sess.query("INSERT INTO db_xxx.log_table_xxx VALUES ('a', 1), ('b', 3), ('c', 2), ('d', 5);")
sess.query(
    "CREATE VIEW db_xxx.view_xxx AS SELECT * FROM db_xxx.log_table_xxx LIMIT 4;"
)
print("Select from view:\n")
print(sess.query("SELECT * FROM db_xxx.view_xxx", "Pretty"))

: test_stateful.py

import chdb.dbapi as dbapi
print("chdb driver version: {0}".format(dbapi.get_client_info()))

conn1 = dbapi.connect()
cur1 = conn1.cursor()
cur1.execute('select version()')
print("description: ", cur1.description)
print("data: ", cur1.fetchone())
cur1.close()
conn1.close()
from chdb.udf import chdb_udf
from chdb import query

@chdb_udf()
def sum_udf(lhs, rhs):
    return int(lhs) + int(rhs)

print(query("select sum_udf(12,22)"))

: test_udf.py.

examples tests


  • Star

Apache 2.0 LICENSE

chDB ClickHouse chDB