Documentation Index
Fetch the complete documentation index at: https://mintlify.com/apache/iceberg/llms.txt
Use this file to discover all available pages before exploring further.
The Expressions class provides factory methods for creating filter expressions and predicates in Apache Iceberg.
Overview
Expressions are used to:
- Filter data during scans
- Define partition predicates
- Specify row-level conditions
- Create aggregations
All expressions are immutable and can be safely reused.
Logical Operators
and()
Combines two expressions with AND logic.
Expression and(Expression left, Expression right)
Expression and(Expression left, Expression right, Expression... expressions)
Example:
import static org.apache.iceberg.expressions.Expressions.*;
Expression expr = and(
greaterThan("age", 18),
lessThan("age", 65)
);
// Multiple AND
Expression multi = and(
equal("status", "active"),
greaterThan("score", 100),
notNull("email")
);
or()
Combines two expressions with OR logic.
Expression or(Expression left, Expression right)
Example:
Expression expr = or(
equal("category", "electronics"),
equal("category", "computers")
);
not()
Negates an expression.
Expression not(Expression child)
Example:
Expression expr = not(equal("deleted", true));
Comparison Predicates
equal()
Tests for equality.
<T> UnboundPredicate<T> equal(String name, T value)
<T> UnboundPredicate<T> equal(UnboundTerm<T> expr, T value)
Example:
Expression expr = equal("status", "active");
Expression numExpr = equal("count", 42);
notEqual()
Tests for inequality.
<T> UnboundPredicate<T> notEqual(String name, T value)
<T> UnboundPredicate<T> notEqual(UnboundTerm<T> expr, T value)
Example:
Expression expr = notEqual("status", "deleted");
lessThan()
Tests if value is less than the given value.
<T> UnboundPredicate<T> lessThan(String name, T value)
<T> UnboundPredicate<T> lessThan(UnboundTerm<T> expr, T value)
Example:
Expression expr = lessThan("age", 30);
Expression dateExpr = lessThan("created_at", timestamp);
lessThanOrEqual()
Tests if value is less than or equal to the given value.
<T> UnboundPredicate<T> lessThanOrEqual(String name, T value)
<T> UnboundPredicate<T> lessThanOrEqual(UnboundTerm<T> expr, T value)
greaterThan()
Tests if value is greater than the given value.
<T> UnboundPredicate<T> greaterThan(String name, T value)
<T> UnboundPredicate<T> greaterThan(UnboundTerm<T> expr, T value)
Example:
Expression expr = greaterThan("price", 99.99);
greaterThanOrEqual()
Tests if value is greater than or equal to the given value.
<T> UnboundPredicate<T> greaterThanOrEqual(String name, T value)
<T> UnboundPredicate<T> greaterThanOrEqual(UnboundTerm<T> expr, T value)
String Predicates
startsWith()
Tests if string starts with a prefix.
UnboundPredicate<String> startsWith(String name, String value)
UnboundPredicate<String> startsWith(UnboundTerm<String> expr, String value)
Example:
Expression expr = startsWith("email", "admin@");
notStartsWith()
Tests if string does not start with a prefix.
UnboundPredicate<String> notStartsWith(String name, String value)
UnboundPredicate<String> notStartsWith(UnboundTerm<String> expr, String value)
Example:
Expression expr = notStartsWith("username", "test_");
Null Predicates
isNull()
Tests if value is null.
<T> UnboundPredicate<T> isNull(String name)
<T> UnboundPredicate<T> isNull(UnboundTerm<T> expr)
Example:
Expression expr = isNull("deleted_at");
notNull()
Tests if value is not null.
<T> UnboundPredicate<T> notNull(String name)
<T> UnboundPredicate<T> notNull(UnboundTerm<T> expr)
Example:
Expression expr = notNull("email");
isNaN()
Tests if value is NaN (for floating point types).
<T> UnboundPredicate<T> isNaN(String name)
<T> UnboundPredicate<T> isNaN(UnboundTerm<T> expr)
notNaN()
Tests if value is not NaN.
<T> UnboundPredicate<T> notNaN(String name)
<T> UnboundPredicate<T> notNaN(UnboundTerm<T> expr)
Set Predicates
in()
Tests if value is in a set of values.
<T> UnboundPredicate<T> in(String name, T... values)
<T> UnboundPredicate<T> in(String name, Iterable<T> values)
<T> UnboundPredicate<T> in(UnboundTerm<T> expr, T... values)
<T> UnboundPredicate<T> in(UnboundTerm<T> expr, Iterable<T> values)
Example:
Expression expr = in("status", "pending", "approved", "completed");
List<String> categories = Arrays.asList("A", "B", "C");
Expression listExpr = in("category", categories);
notIn()
Tests if value is not in a set of values.
<T> UnboundPredicate<T> notIn(String name, T... values)
<T> UnboundPredicate<T> notIn(String name, Iterable<T> values)
<T> UnboundPredicate<T> notIn(UnboundTerm<T> expr, T... values)
<T> UnboundPredicate<T> notIn(UnboundTerm<T> expr, Iterable<T> values)
Example:
Expression expr = notIn("status", "deleted", "archived");
bucket()
Bucket transform.
<T> UnboundTerm<T> bucket(String name, int numBuckets)
Example:
Expression expr = equal(bucket("id", 16), 5);
year()
Year transform for dates and timestamps.
<T> UnboundTerm<T> year(String name)
Example:
Expression expr = equal(year("created_at"), 2024);
month()
Month transform for dates and timestamps.
<T> UnboundTerm<T> month(String name)
Example:
Expression expr = equal(month("event_date"), 6); // June
day()
Day transform for dates and timestamps.
<T> UnboundTerm<T> day(String name)
Example:
Expression expr = greaterThan(day("timestamp"), 15);
hour()
Hour transform for timestamps.
<T> UnboundTerm<T> hour(String name)
Example:
Expression expr = equal(hour("event_time"), 14); // 2 PM
truncate()
Truncate transform.
<T> UnboundTerm<T> truncate(String name, int width)
Example:
// Truncate string to 10 characters
Expression expr = equal(truncate("name", 10), "John Smith");
Literals
lit()
Creates a literal from a value.
<T> Literal<T> lit(T value)
Example:
Literal<Long> numLit = lit(42L);
Literal<String> strLit = lit("hello");
Literal<Boolean> boolLit = lit(true);
Timestamp Literals
// Microseconds
Literal<Long> micros(long micros)
// Milliseconds
Literal<Long> millis(long millis)
// Nanoseconds
Literal<Long> nanos(long nanos)
Example:
long now = System.currentTimeMillis();
Literal<Long> timestamp = millis(now);
Aggregates
count()
Count non-null values.
<T> UnboundAggregate<T> count(String name)
countNull()
Count null values.
<T> UnboundAggregate<T> countNull(String name)
countStar()
Count all rows.
<T> UnboundAggregate<T> countStar()
max()
Maximum value.
<T> UnboundAggregate<T> max(String name)
min()
Minimum value.
<T> UnboundAggregate<T> min(String name)
Always True/False
alwaysTrue()
Expression that always evaluates to true.
alwaysFalse()
Expression that always evaluates to false.
Examples
Basic Filtering
import org.apache.iceberg.Table;
import org.apache.iceberg.TableScan;
import static org.apache.iceberg.expressions.Expressions.*;
// Simple equality filter
TableScan scan = table.newScan()
.filter(equal("category", "electronics"));
// Range filter
TableScan rangeScan = table.newScan()
.filter(and(
greaterThanOrEqual("price", 10.0),
lessThan("price", 100.0)
));
Complex Filters
import org.apache.iceberg.expressions.Expression;
// Multiple conditions
Expression filter = and(
equal("status", "active"),
or(
equal("category", "A"),
equal("category", "B")
),
greaterThan("score", 80),
notNull("email")
);
TableScan scan = table.newScan().filter(filter);
Date and Time Filtering
import java.time.Instant;
// Filter by year
Expression yearFilter = equal(year("event_date"), 2024);
// Filter by month and year
Expression monthFilter = and(
equal(year("event_date"), 2024),
equal(month("event_date"), 6)
);
// Filter by timestamp range
long startTime = Instant.parse("2024-01-01T00:00:00Z").toEpochMilli();
long endTime = Instant.parse("2024-12-31T23:59:59Z").toEpochMilli();
Expression timeRange = and(
greaterThanOrEqual("timestamp", millis(startTime)),
lessThan("timestamp", millis(endTime))
);
String Filtering
// Prefix matching
Expression prefixFilter = startsWith("email", "admin@");
// Exclude test users
Expression excludeTest = notStartsWith("username", "test_");
// IN clause
Expression statusFilter = in(
"status",
"pending",
"approved",
"processing"
);
Partition Filtering
// Filter by partitioned column
Expression partFilter = and(
equal("date", "2024-01-15"),
equal("region", "us-west")
);
TableScan scan = table.newScan()
.filter(partFilter);
Null Handling
// Find records with missing data
Expression missingData = or(
isNull("email"),
isNull("phone")
);
// Find complete records
Expression completeData = and(
notNull("email"),
notNull("phone"),
notNull("address")
);
Dynamic Filter Building
import java.util.List;
public Expression buildFilter(List<String> statuses) {
if (statuses.isEmpty()) {
return alwaysTrue();
}
if (statuses.size() == 1) {
return equal("status", statuses.get(0));
}
return in("status", statuses);
}
// Usage
List<String> activeStatuses = Arrays.asList("pending", "processing");
Expression filter = buildFilter(activeStatuses);
// Bucket + range filter
Expression bucketFilter = and(
equal(bucket("user_id", 16), 5),
greaterThan("score", 100)
);
// Time-based partitioning
Expression timePartFilter = and(
equal(year("timestamp"), 2024),
equal(month("timestamp"), 1),
greaterThan(day("timestamp"), 15)
);
See Also