Skip to content
Star -

SQL Reference

Related Topics: SQL Endpoints Tutorial (step-by-step guide) | DuckDB Integration (extensions, data sources) | Authentication (user functions)

MXCP uses DuckDB SQL syntax with additional built-in functions for authentication and access control.

MXCP endpoints use DuckDB SQL, which extends PostgreSQL syntax with analytical features:

  • PostgreSQL Compatible: Most PostgreSQL queries work unchanged
  • Column-Store Engine: Optimized for analytical queries
  • Rich Type System: Arrays, structs, maps, and more
  • Window Functions: Full analytical window function support
  • CTEs: Common Table Expressions with recursive support

When authentication is enabled, these functions provide user information:

FunctionReturnsDescription
get_username()VARCHARAuthenticated user’s username
get_user_email()VARCHARUser’s email address
get_user_provider()VARCHAROAuth provider (github, atlassian, etc.)
get_user_external_token()VARCHARUser’s OAuth token

All functions return NULL when authentication is disabled or user is not authenticated.

-- Filter data by authenticated user
SELECT * FROM projects
WHERE owner = get_username();
-- Access external API with user's token
SELECT *
FROM read_json_auto(
'https://api.github.com/user/repos',
headers = MAP {
'Authorization': 'Bearer ' || get_user_external_token(),
'User-Agent': 'MXCP-' || get_username()
}
);
-- Audit logging
INSERT INTO audit_log (user, action, timestamp)
VALUES (get_username(), 'query_executed', NOW());
-- User-specific data
SELECT
id,
title,
CASE
WHEN owner = get_username() THEN content
ELSE '[Restricted]'
END as content
FROM documents;

HTTP transport exposes request headers to DuckDB, allowing you to audit or forward them without extra plumbing:

FunctionReturnsDescription
get_request_header(name)VARCHARSpecific header value (NULL if missing)
get_request_headers_json()VARCHARAll headers as JSON (NULL if no headers)

Only available when using HTTP transport (streamable-http, sse). Returns NULL for stdio transport.

-- Forward authorization header
SELECT http_post(
'https://example.internal/audit',
headers := map {
'Authorization': get_request_header('Authorization')
},
body := json('{"status": "ok"}')
);
-- Access custom headers
SELECT get_request_header('X-Request-ID') as request_id;
-- Get all headers
SELECT get_request_headers_json() as all_headers;

Use named parameters with $ prefix:

-- Simple parameters
SELECT * FROM users WHERE id = $user_id;
-- Multiple parameters
SELECT * FROM orders
WHERE customer_id = $customer_id
AND status = $status
AND created_at > $since;
-- String parameters
SELECT * FROM products
WHERE name LIKE '%' || $search || '%';
-- Array parameters
SELECT * FROM users
WHERE id IN (SELECT unnest($user_ids));
tool:
name: search_users
parameters:
- name: department
type: string
- name: min_age
type: integer
source:
code: |
SELECT * FROM users
WHERE department = $department
AND age >= $min_age
Terminal window
mxcp query "SELECT * FROM users WHERE age > $age" --param age=18
mxcp run tool search_users --param department=Engineering --param min_age=25

MXCP typically loads these DuckDB extensions:

Read from HTTP/S3 endpoints:

-- HTTP files
SELECT * FROM read_csv('https://example.com/data.csv');
-- S3 files
SELECT * FROM read_parquet('s3://bucket/data.parquet');

JSON parsing and manipulation:

-- Extract JSON field (returns JSON type)
SELECT json_extract(data, '$.name') as name FROM events;
-- Extract as string
SELECT data->>'$.name' as name FROM events;
-- Unnest JSON arrays (convert to list first)
SELECT unnest(from_json(data->'$.items', '["JSON"]')) as item FROM orders;

Parquet file support:

-- Read Parquet
SELECT * FROM read_parquet('data/*.parquet');
-- Write Parquet
COPY (SELECT * FROM users) TO 'output.parquet';

PostgreSQL connectivity:

-- Attach database
ATTACH 'postgresql://user:pass@host:5432/db' AS pg;
-- Query tables
SELECT * FROM pg.public.users;

Excel file support:

-- Read Excel
SELECT * FROM read_xlsx('report.xlsx', sheet='Data');
-- Running totals
SELECT
date,
amount,
SUM(amount) OVER (ORDER BY date) as running_total
FROM sales;
-- Ranking
SELECT
name,
score,
RANK() OVER (ORDER BY score DESC) as rank
FROM players;
-- Partitioned calculations
SELECT
department,
employee,
salary,
AVG(salary) OVER (PARTITION BY department) as dept_avg
FROM employees;
-- Simple CTE
WITH active_users AS (
SELECT * FROM users WHERE status = 'active'
)
SELECT * FROM active_users WHERE age > 25;
-- Multiple CTEs
WITH
orders_2024 AS (
SELECT * FROM orders WHERE year = 2024
),
high_value AS (
SELECT * FROM orders_2024 WHERE total > 1000
)
SELECT customer_id, COUNT(*) as high_value_orders
FROM high_value
GROUP BY customer_id;
-- Recursive CTE
WITH RECURSIVE subordinates AS (
SELECT id, name, manager_id, 1 as level
FROM employees WHERE id = $root_id
UNION ALL
SELECT e.id, e.name, e.manager_id, s.level + 1
FROM employees e
JOIN subordinates s ON e.manager_id = s.id
)
SELECT * FROM subordinates;

Transform data between wide and long formats:

  • PIVOT: Converts rows to columns (long → wide). Takes distinct values from one column and creates new columns for each value.
  • UNPIVOT: Converts columns to rows (wide → long). Takes multiple columns and stacks them into key-value pairs.
-- Pivot: Convert rows to columns
-- Input: sales(product, month, amount) with months as rows
-- Output: One row per product with jan, feb, mar as columns
PIVOT sales ON month USING SUM(amount);
-- Unpivot: Convert columns to rows
-- Input: monthly_data with jan, feb, mar columns
-- Output: Rows with (month, amount) pairs
UNPIVOT monthly_data ON jan, feb, mar INTO
NAME month
VALUE amount;

Aggregate values into arrays instead of scalar results. Useful for collecting related items or preserving ordering within groups.

-- Collect names into arrays per department
SELECT
department,
LIST(name) as employees
FROM users
GROUP BY department;
-- Collect with ordering (highest salaries first)
SELECT
department,
ARRAY_AGG(salary ORDER BY salary DESC) as salaries
FROM employees
GROUP BY department;
TypeExample
VARCHAR'hello'
INTEGER42
DOUBLE3.14
BOOLEANtrue, false
DATEDATE '2024-01-01'
TIMESTAMPTIMESTAMP '2024-01-01 12:00:00'

DuckDB supports nested data structures for representing complex data:

-- Arrays: ordered collections of same-type values
SELECT [1, 2, 3] as numbers;
SELECT array_agg(name) as names FROM users;
-- Structs: named fields (like objects/records)
SELECT {'name': 'Alice', 'age': 30} as person;
SELECT struct_pack(name := 'Alice', age := 30);
-- Maps: key-value pairs with dynamic keys
SELECT MAP {'key1': 'value1', 'key2': 'value2'};

Convert between types explicitly. Use TRY_CAST when the conversion might fail to avoid errors.

-- Explicit cast
SELECT CAST(age AS VARCHAR) as age_str FROM users;
-- Cast shorthand (PostgreSQL-style)
SELECT age::VARCHAR as age_str FROM users;
-- Try cast (returns NULL on failure instead of error)
SELECT TRY_CAST(value AS INTEGER) FROM data;

Always use parameters for user input:

-- Good: Parameterized
SELECT * FROM users WHERE id = $user_id;
-- Bad: String concatenation (SQL injection risk)
SELECT * FROM users WHERE id = ''' + user_id + ''';

Always limit large result sets:

SELECT * FROM logs
ORDER BY timestamp DESC
LIMIT 100;
-- Clear and readable
WITH recent_orders AS (
SELECT * FROM orders WHERE date > $since
),
order_totals AS (
SELECT customer_id, SUM(amount) as total
FROM recent_orders
GROUP BY customer_id
)
SELECT * FROM order_totals WHERE total > 1000;
-- Use equality for indexed columns
SELECT * FROM users WHERE id = $user_id;
-- Avoid functions on indexed columns
-- Bad: WHERE LOWER(email) = 'test@example.com'
-- Good: WHERE email = 'test@example.com'