Built-in Spark Scalar Functions
INFO
Unless otherwise specified, the functions listed here are available in both PySpark (the pyspark.sql.functions module) and Spark SQL.
Array
| Function | Supported | Note |
|---|---|---|
array | ✅ | |
array_append | ✅ | |
array_compact | ✅ | |
array_concat | ✅ | This is Sail exclusive (not available in PySpark or Spark SQL). This is a synonym of |
array_contains | ✅ | |
array_contains_all | ✅ | This is Sail exclusive (not available in PySpark or Spark SQL). |
array_distinct | ✅ | |
array_except | ✅ | |
array_insert | ✅ | |
array_intersect | ✅ | |
array_join | ✅ (partial) | |
array_max | ✅ | |
array_min | ✅ | |
array_position | ✅ | |
array_prepend | ✅ | |
array_remove | ✅ | |
array_repeat | ✅ | |
array_size | ✅ | |
array_union | ✅ | |
arrays_overlap | ✅ | |
arrays_zip | ✅ | |
flatten | ✅ | |
get | ✅ | |
sequence | ✅ (partial) | |
slice | ✅ | |
sort_array | ✅ | |
shuffle | 🚧 |
Bitwise
| Function | Supported | Note |
|---|---|---|
& | ✅ | |
^ | ✅ | |
<< | ✅ | |
>> | ✅ | |
>>> | ✅ | |
| | ✅ | |
~ | ✅ | |
bit_count | ✅ | |
bit_get | ✅ | |
bitwise_not | ✅ | |
getbit | ✅ | |
shiftleft | ✅ | |
shiftright | ✅ | |
shiftrightunsigned | ✅ |
Collection
| Function | Supported | Note |
|---|---|---|
array_sort | ✅ (partial) | Array sorting is supported for the case when the lambda function is not provided. |
cardinality | ✅ | |
concat | ✅ | |
element_at | ✅ | |
reverse | ✅ | |
size | ✅ | |
try_element_at | ✅ | |
aggregate | 🚧 | |
exists | 🚧 | |
filter | 🚧 | |
forall | 🚧 | |
map_filter | 🚧 | |
map_zip_with | 🚧 | |
reduce | 🚧 | |
transform | 🚧 | |
transform_keys | 🚧 | |
transform_values | 🚧 | |
zip_with | 🚧 |
Conditional
| Function | Supported | Note |
|---|---|---|
coalesce | ✅ | |
ifnull | ✅ | |
nanvl | ✅ | |
nullif | ✅ | |
nullifzero | ✅ | |
nvl | ✅ | |
nvl2 | ✅ | |
when | ✅ | |
zeroifnull | ✅ |
Conversion
| Function | Supported | Note |
|---|---|---|
bigint | ✅ | |
binary | ✅ | |
boolean | ✅ | |
cast | ✅ | |
date | ✅ | |
decimal | ✅ | |
double | ✅ | |
float | ✅ | |
int | ✅ | |
smallint | ✅ | |
string | ✅ | |
timestamp | ✅ | |
tinyint | ✅ |
CSV
| Function | Supported | Note |
|---|---|---|
from_csv | ✅ | |
schema_of_csv | 🚧 | |
to_csv | 🚧 |
Datetime
| Function | Supported | Note |
|---|---|---|
add_days | ✅ | |
add_months | ✅ | |
add_years | ✅ | |
convert_timezone | ✅ | |
curdate | ✅ | |
current_date | ✅ | |
current_timestamp | ✅ | |
current_timezone | ✅ | |
date_add | ✅ | |
date_diff | ✅ | |
date_format | ✅ | |
date_from_unix_date | ✅ | |
date_part | ✅ | |
date_sub | ✅ | |
date_trunc | ✅ | |
dateadd | ✅ | |
datediff | ✅ | |
datepart | ✅ | |
day | ✅ | |
dayname | ✅ | |
dayofmonth | ✅ | |
dayofweek | ✅ | |
dayofyear | ✅ | |
extract | ✅ | |
from_unixtime | ✅ | |
from_utc_timestamp | ✅ | |
hour | ✅ | |
last_day | ✅ | |
localtimestamp | ✅ | |
make_date | ✅ | |
make_dt_interval | ✅ | |
make_interval | ✅ | |
make_timestamp | ✅ | |
make_timestamp_ltz | ✅ | |
make_timestamp_ntz | ✅ | |
make_ym_interval | ✅ | |
minute | ✅ | |
month | ✅ | |
monthname | ✅ | |
months_between | ✅ | |
next_day | ✅ | |
now | ✅ | |
quarter | ✅ | |
second | ✅ | |
timestamp_micros | ✅ | |
timestamp_millis | ✅ | |
timestamp_seconds | ✅ | |
to_date | ✅ | |
to_timestamp | ✅ | |
to_timestamp_ltz | ✅ | |
to_timestamp_ntz | ✅ | |
to_unix_timestamp | ✅ | |
to_utc_timestamp | ✅ | |
trunc | ✅ | |
try_to_timestamp | ✅ | |
unix_date | ✅ | |
unix_micros | ✅ | |
unix_millis | ✅ | |
unix_seconds | ✅ | |
unix_timestamp | ✅ | |
weekday | ✅ | |
weekofyear | ✅ | |
year | ✅ | |
session_window | 🚧 | |
timestamp_add | 🚧 | |
timestamp_diff | 🚧 | |
try_make_interval | 🚧 | |
try_make_timestamp | 🚧 | |
try_make_timestamp_ltz | 🚧 | |
try_make_timestamp_ntz | 🚧 | |
window | 🚧 | |
window_time | 🚧 |
Hash
| Function | Supported | Note |
|---|---|---|
crc32 | ✅ | |
hash | ✅ | |
md5 | ✅ | |
sha | ✅ | |
sha1 | ✅ | |
sha2 | ✅ | |
xxhash64 | ✅ |
JSON
| Function | Supported | Note |
|---|---|---|
get_json_object | ✅ | |
json_array_length | ✅ | |
json_object_keys | ✅ | |
to_json | ✅ | |
from_json | 🚧 | |
json_tuple | 🚧 | |
schema_of_json | 🚧 |
Map
| Function | Supported | Note |
|---|---|---|
create_map | ✅ | |
map | ✅ | This is Spark SQL only (not available in PySpark). |
map_concat | ✅ | |
map_contains_key | ✅ | |
map_entries | ✅ | |
map_from_arrays | ✅ | |
map_from_entries | ✅ | |
map_keys | ✅ | |
map_values | ✅ | |
str_to_map | ✅ |
Math
| Function | Supported | Note |
|---|---|---|
- | ✅ | |
* | ✅ | |
/ | ✅ | |
% | ✅ | |
+ | ✅ | |
abs | ✅ | |
acos | ✅ | |
acosh | ✅ | |
asin | ✅ | |
asinh | ✅ | |
atan | ✅ | |
atan2 | ✅ | |
atanh | ✅ | |
bin | ✅ | |
bround | ✅ | |
cbrt | ✅ | |
ceil | ✅ | |
ceiling | ✅ | |
conv | ✅ | |
cos | ✅ | |
cosh | ✅ | |
cot | ✅ | |
csc | ✅ | |
degrees | ✅ | |
div | ✅ | This is Spark SQL only (not available in PySpark). |
e | ✅ | |
exp | ✅ | |
expm1 | ✅ | |
factorial | ✅ | |
floor | ✅ | |
greatest | ✅ | |
hex | ✅ | |
hypot | ✅ | |
least | ✅ | |
ln | ✅ | |
log | ✅ | |
log10 | ✅ | |
log1p | ✅ | |
log2 | ✅ | |
mod | ✅ | This is Spark SQL only (not available in PySpark). |
negate | ✅ | |
negative | ✅ | |
pi | ✅ | |
pmod | ✅ | |
positive | ✅ | |
pow | ✅ | |
power | ✅ | |
radians | ✅ | |
rand | ✅ | |
randn | ✅ | |
random | ✅ | |
random_poisson | ✅ | |
rint | ✅ | |
round | ✅ | |
sec | ✅ | |
sign | ✅ | |
signum | ✅ | |
sin | ✅ | |
sinh | ✅ | |
sqrt | ✅ | |
tan | ✅ | |
tanh | ✅ | |
try_add | ✅ | |
try_divide | ✅ | |
try_mod | ✅ | |
try_multiply | ✅ | |
try_subtract | ✅ | |
unhex | ✅ | |
width_bucket | ✅ | |
uniform | 🚧 |
Misc
| Function | Supported | Note |
|---|---|---|
aes_decrypt | ✅ | |
aes_encrypt | ✅ | |
assert_true | ✅ | |
bitmap_bit_position | ✅ | |
bitmap_bucket_number | ✅ | |
bitmap_count | ✅ | |
current_catalog | ✅ | |
current_database | ✅ | |
current_schema | ✅ | |
current_user | ✅ | |
monotonically_increasing_id | ✅ | |
raise_error | ✅ | |
session_user | ✅ | |
try_aes_decrypt | ✅ | |
try_aes_encrypt | ✅ | This is Sail exclusive (not available in PySpark or Spark SQL). |
typeof | ✅ | |
user | ✅ | |
uuid | ✅ | |
version | ✅ | |
from_avro | 🚧 | This is Spark SQL only (not available in PySpark). |
from_protobuf | 🚧 | This is Spark SQL only (not available in PySpark). |
hll_sketch_estimate | 🚧 | |
hll_union | 🚧 | |
input_file_block_length | 🚧 | |
input_file_block_start | 🚧 | |
input_file_name | 🚧 | |
schema_of_avro | 🚧 | This is Spark SQL only (not available in PySpark). |
spark_partition_id | 🚧 | |
to_avro | 🚧 | This is Spark SQL only (not available in PySpark). |
to_protobuf | 🚧 | This is Spark SQL only (not available in PySpark). |
java_method | ❌ | This is specific to the Java implementation. |
reflect | ❌ | This is specific to the Java implementation. |
try_reflect | ❌ | This is specific to the Java implementation. |
Predicate
| Function | Supported | Note |
|---|---|---|
! | ✅ | |
!= | ✅ | |
< | ✅ | |
<= | ✅ | |
<=> | ✅ | |
= | ✅ | |
== | ✅ | |
> | ✅ | |
>= | ✅ | |
and | ✅ | This is Spark SQL only (not available in PySpark). |
equal_null | ✅ | |
ilike | ✅ | |
in | ✅ | This is Spark SQL only (not available in PySpark). |
isnan | ✅ | |
isnotnull | ✅ | |
isnull | ✅ | |
like | ✅ | |
not | ✅ | This is Spark SQL only (not available in PySpark). |
or | ✅ | This is Spark SQL only (not available in PySpark). |
regexp | ✅ | |
regexp_like | ✅ | |
rlike | ✅ |
String
| Function | Supported | Note |
|---|---|---|
ascii | ✅ | |
base64 | ✅ | |
bit_length | ✅ | |
btrim | ✅ | |
char | ✅ | |
char_length | ✅ | |
character_length | ✅ | |
chr | ✅ | |
concat_ws | ✅ | |
contains | ✅ | |
decode | ✅ | |
elt | ✅ | |
encode | ✅ | |
endswith | ✅ | |
find_in_set | ✅ | |
format_number | ✅ | |
format_string | ✅ | |
initcap | ✅ | |
instr | ✅ | |
is_valid_utf8 | ✅ | |
lcase | ✅ | |
left | ✅ | |
len | ✅ | This is Spark SQL only (not available in PySpark). |
length | ✅ | |
levenshtein | ✅ | |
locate | ✅ | |
lower | ✅ | |
lpad | ✅ | |
ltrim | ✅ | |
luhn_check | ✅ | This is Spark SQL only (not available in PySpark). |
make_valid_utf8 | ✅ | |
mask | ✅ | |
octet_length | ✅ | |
overlay | ✅ | |
position | ✅ | |
printf | ✅ | |
randstr | ✅ | |
regexp_count | ✅ | |
regexp_extract | ✅ | |
regexp_instr | ✅ | |
regexp_replace | ✅ | |
regexp_substr | ✅ | |
repeat | ✅ | |
replace | ✅ | |
right | ✅ | |
rpad | ✅ | |
rtrim | ✅ | |
soundex | ✅ | |
space | ✅ | This is Spark SQL only (not available in PySpark). |
split | ✅ | |
split_part | ✅ | |
startswith | ✅ | |
strpos | ✅ | |
substr | ✅ | |
substring | ✅ | |
substring_index | ✅ | |
to_binary | ✅ | |
to_number | ✅ | |
translate | ✅ | |
trim | ✅ | |
try_to_binary | ✅ | |
try_to_number | ✅ | |
try_validate_utf8 | ✅ | |
ucase | ✅ | |
unbase64 | ✅ | |
upper | ✅ | |
validate_utf8 | ✅ | |
collate | 🚧 | |
collation | 🚧 | |
regexp_extract_all | 🚧 | |
sentences | 🚧 | |
to_char | 🚧 | |
to_varchar | 🚧 |
Struct
| Function | Supported | Note |
|---|---|---|
named_struct | ✅ | |
struct | ✅ |
URL
| Function | Supported | Note |
|---|---|---|
parse_url | ✅ | |
try_parse_url | ✅ | |
try_url_decode | ✅ | |
url_decode | ✅ | |
url_encode | ✅ |
Variant
| Function | Supported | Note |
|---|---|---|
is_variant_null | 🚧 | |
parse_json | 🚧 | |
schema_of_variant | 🚧 | |
schema_of_variant_agg | 🚧 | |
to_variant_object | 🚧 | |
try_parse_json | 🚧 | |
try_variant_get | 🚧 | |
variant_explode | 🚧 | |
variant_explode_outer | 🚧 | |
variant_get | 🚧 |
XML
| Function | Supported | Note |
|---|---|---|
from_xml | 🚧 | |
schema_of_xml | 🚧 | |
to_xml | 🚧 | |
xpath | 🚧 | |
xpath_boolean | 🚧 | |
xpath_double | 🚧 | |
xpath_float | 🚧 | |
xpath_int | 🚧 | |
xpath_long | 🚧 | |
xpath_number | 🚧 | |
xpath_short | 🚧 | |
xpath_string | 🚧 |
