This chapter covers three essential text processing modules: regular expressions for pattern matching, structured logging for debugging, and StringBuilder for efficient string concatenation.
13.1 Regular Expressions
The stdlib/regex.nano module provides POSIX regex pattern matching.
Compiling Patterns
from "stdlib/regex.nano" import compile, matches, Regex
fn validate_email(email: string) -> bool {
let pattern: Regex = (compile "[a-zA-Z0-9]+@[a-zA-Z0-9]+\\.[a-z]+")
let result: bool = (matches pattern email)
return result
}
shadow validate_email {
assert (validate_email "user@example.com")
assert (not (validate_email "invalid"))
}
**Key points:**
compile(pattern)- Returns opaqueRegexhandle (GC-managed, no manual cleanup needed)- Backslashes must be escaped:
\\
Matching Patterns
from "stdlib/regex.nano" import compile, matches, Regex
fn test_patterns() -> bool {
let pattern: Regex = (compile "hello.*world")
let match1: bool = (matches pattern "hello beautiful world") # true
let match2: bool = (matches pattern "hello world") # true
let match3: bool = (matches pattern "goodbye world") # false
return (and match1 (and match2 (not match3)))
}
shadow test_patterns {
assert (test_patterns)
}
Finding Matches
from "stdlib/regex.nano" import compile, find, free, Regex
fn find_position(text: string, pattern_str: string) -> int {
let pattern: Regex = (compile pattern_str)
let pos: int = (find pattern text)
return pos
}
shadow find_position {
assert (== (find_position "hello world" "world") 6)
assert (== (find_position "no match" "xyz") -1)
}
**Returns:** Index of first match, or -1 if not found
Finding All Matches
from "stdlib/regex.nano" import compile, find_all, free, Regex
fn count_matches(text: string, pattern_str: string) -> int {
let pattern: Regex = (compile pattern_str)
let positions: array<int> = (find_all pattern text)
let count: int = (array_length positions)
return count
}
shadow count_matches {
assert (== (count_matches "aaa" "a") 3)
assert (== (count_matches "hello world" "o") 2)
}
Capture Groups
from "stdlib/regex.nano" import compile, groups, free, Regex
fn extract_parts(text: string) -> array<string> {
let pattern: Regex = (compile "([a-z]+)@([a-z]+)\\.([a-z]+)")
let captures: array<string> = (groups pattern text)
return captures
}
shadow extract_parts {
let parts: array<string> = (extract_parts "user@example.com")
# parts[0] = full match, parts[1..3] = groups
assert (> (array_length parts) 0)
}
Replacing Text
from "stdlib/regex.nano" import compile, replace, replace_all, free, Regex
fn clean_text(text: string) -> string {
let pattern: Regex = (compile "[0-9]+")
let result: string = (replace_all pattern text "X")
return result
}
shadow clean_text {
assert (== (clean_text "abc123def456") "abcXdefX")
}
**Functions:**
replace(regex, text, replacement)- Replace first matchreplace_all(regex, text, replacement)- Replace all matches
Splitting Strings
from "stdlib/regex.nano" import compile, split, free, Regex
fn split_by_comma(text: string) -> array<string> {
let pattern: Regex = (compile ",\\s*")
let parts: array<string> = (split pattern text)
return parts
}
shadow split_by_comma {
let parts: array<string> = (split_by_comma "a, b, c")
assert (== (array_length parts) 3)
}
Common Patterns
**Email validation:**
let email_pattern: Regex = (compile "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}")
**Phone numbers:**
let phone_pattern: Regex = (compile "\\(?[0-9]{3}\\)?[-. ]?[0-9]{3}[-. ]?[0-9]{4}")
**URLs:**
let url_pattern: Regex = (compile "https?://[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}")
**Integers:**
let int_pattern: Regex = (compile "-?[0-9]+")
**Floats:**
let float_pattern: Regex = (compile "-?[0-9]+\\.[0-9]+")
13.2 Structured Logging
The stdlib/log.nano module provides hierarchical logging with categories.
Log Levels
from "stdlib/log.nano" import log_trace, log_debug, log_info
from "stdlib/log.nano" import log_warn, log_error, log_fatal
fn demonstrate_levels() -> void {
(log_trace "cat" "Detailed trace info") # TRACE
(log_debug "cat" "Debug information") # DEBUG
(log_info "cat" "Normal operation") # INFO (default)
(log_warn "cat" "Warning message") # WARN
(log_error "cat" "Error occurred") # ERROR
(log_fatal "cat" "Fatal error") # FATAL
}
shadow demonstrate_levels {
(demonstrate_levels)
}
**Log levels (least to most severe):**
1. **TRACE** - Verbose debugging
2. **DEBUG** - Development debugging
3. **INFO** - Normal operations (default threshold)
4. **WARN** - Potential problems
5. **ERROR** - Failures that don't halt
6. **FATAL** - Critical errors
Using Categories
from "stdlib/log.nano" import log_info, log_error
fn process_user_request(user_id: int) -> bool {
(log_info "auth" (+ "User login: " (int_to_string user_id)))
if (< user_id 0) {
(log_error "validation" "Invalid user ID")
return false
}
(log_info "database" "Fetching user data")
return true
}
shadow process_user_request {
assert (process_user_request 123)
assert (not (process_user_request -1))
}
**Categories help organize logs by:**
- Component:
"auth","database","network" - Module:
"parser","compiler","runtime" - Feature:
"payment","search","upload"
Logging Without Categories
from "stdlib/log.nano" import log, log_err
fn simple_logging() -> void {
(log "Application started")
(log_err "Something went wrong")
}
shadow simple_logging {
(simple_logging)
}
**Convenience functions:**
log(message)- Log at INFO level, no categorylog_err(message)- Log at ERROR level, no category
Output Format
[LEVEL] category: message
**Examples:**
[INFO] app: Application started
[DEBUG] parser: Parsing token at position 42
[ERROR] database: Connection timeout
[WARN] cache: Memory usage at 90%
Best Practices
**✅ DO:**
from "stdlib/log.nano" import log_info, log_error, log_debug
fn good_logging() -> bool {
# Log important state changes
(log_info "app" "Processing batch of 100 items")
# Use appropriate levels
(log_debug "details" "Processing item 42")
# Include context
(log_error "database" "Failed to connect: timeout after 30s")
return true
}
shadow good_logging {
assert (good_logging)
}
**❌ DON'T:**
# Don't log everything at ERROR
(log_error "app" "Normal operation") # Wrong level!
# Don't include sensitive data
(log_info "auth" (+ "Password: " password)) # Security risk!
# Don't log in tight loops
for i in (range 0 1000000) {
(log_debug "loop" "Iteration") # Performance hit!
}
13.3 StringBuilder
The stdlib/StringBuilder.nano module provides efficient string building.
Why Use StringBuilder?
**Problem:** Naive concatenation is O(n²)
# ❌ Slow: Creates new string each iteration
let mut result: string = ""
for i in (range 0 1000) {
set result (+ result "x") # O(n²) - copies entire string!
}
**Solution:** StringBuilder is O(n)
from "stdlib/StringBuilder.nano" import StringBuilder_new, StringBuilder_append
from "stdlib/StringBuilder.nano" import StringBuilder_to_string
# ✅ Fast: Amortized O(1) per append
let mut sb: StringBuilder = (StringBuilder_new)
for i in (range 0 1000) {
set sb (StringBuilder_append sb "x") # O(1) average
}
let result: string = (StringBuilder_to_string sb)
Creating StringBuilders
from "stdlib/StringBuilder.nano" import StringBuilder_new, StringBuilder_with_capacity
from "stdlib/StringBuilder.nano" import StringBuilder
fn create_builders() -> StringBuilder {
# Default capacity (256)
let sb1: StringBuilder = (StringBuilder_new)
# Custom capacity
let sb2: StringBuilder = (StringBuilder_with_capacity 1024)
return sb1
}
shadow create_builders {
let sb: StringBuilder = (create_builders)
assert (== sb.length 0)
}
Appending Strings
from "stdlib/StringBuilder.nano" import StringBuilder_new, StringBuilder_append
from "stdlib/StringBuilder.nano" import StringBuilder_to_string
fn build_greeting(name: string) -> string {
let mut sb: StringBuilder = (StringBuilder_new)
set sb (StringBuilder_append sb "Hello, ")
set sb (StringBuilder_append sb name)
set sb (StringBuilder_append sb "!")
return (StringBuilder_to_string sb)
}
shadow build_greeting {
assert (== (build_greeting "Alice") "Hello, Alice!")
}
Appending Other Types
from "stdlib/StringBuilder.nano" import StringBuilder_new, StringBuilder_append
from "stdlib/StringBuilder.nano" import StringBuilder_append_int, StringBuilder_append_float
from "stdlib/StringBuilder.nano" import StringBuilder_to_string
fn format_data(name: string, age: int, score: float) -> string {
let mut sb: StringBuilder = (StringBuilder_new)
set sb (StringBuilder_append sb "Name: ")
set sb (StringBuilder_append sb name)
set sb (StringBuilder_append sb ", Age: ")
set sb (StringBuilder_append_int sb age)
set sb (StringBuilder_append sb ", Score: ")
set sb (StringBuilder_append_float sb score)
return (StringBuilder_to_string sb)
}
shadow format_data {
let result: string = (format_data "Alice" 30 95.5)
assert (str_contains result "Alice")
}
StringBuilder Operations
from "stdlib/StringBuilder.nano" import StringBuilder_new, StringBuilder_append
from "stdlib/StringBuilder.nano" import StringBuilder_length, StringBuilder_clear
from "stdlib/StringBuilder.nano" import StringBuilder_to_string
fn builder_operations() -> bool {
let mut sb: StringBuilder = (StringBuilder_new)
set sb (StringBuilder_append sb "hello")
# Get length
let len: int = (StringBuilder_length sb)
assert (== len 5)
# Clear contents
set sb (StringBuilder_clear sb)
assert (== (StringBuilder_length sb) 0)
return true
}
shadow builder_operations {
assert (builder_operations)
}
Practical Example: HTML Generation
from "stdlib/StringBuilder.nano" import StringBuilder_new, StringBuilder_append
from "stdlib/StringBuilder.nano" import StringBuilder_to_string
fn build_html(title: string, body: string) -> string {
let mut sb: StringBuilder = (StringBuilder_new)
set sb (StringBuilder_append sb "<!DOCTYPE html>\n")
set sb (StringBuilder_append sb "<html>\n")
set sb (StringBuilder_append sb "<head><title>")
set sb (StringBuilder_append sb title)
set sb (StringBuilder_append sb "</title></head>\n")
set sb (StringBuilder_append sb "<body>")
set sb (StringBuilder_append sb body)
set sb (StringBuilder_append sb "</body>\n")
set sb (StringBuilder_append sb "</html>")
return (StringBuilder_to_string sb)
}
shadow build_html {
let html: string = (build_html "Test" "Content")
assert (str_contains html "<title>Test</title>")
}
Performance Guidelines
**Use StringBuilder when:**
- ✅ Building strings in loops
- ✅ Concatenating 10+ strings
- ✅ Generating templates/reports
- ✅ Building large outputs
**Use simple concatenation when:**
- ✅ Joining 2-3 strings once
- ✅ Simple formatting
- ✅ Readability is more important
13.4 Combined Example: Log Parser
from "stdlib/regex.nano" import compile, matches, groups, free, Regex
from "stdlib/log.nano" import log_info, log_error
from "stdlib/StringBuilder.nano" import StringBuilder_new, StringBuilder_append
from "stdlib/StringBuilder.nano" import StringBuilder_to_string
struct LogEntry {
level: string,
category: string,
message: string
}
fn parse_log_line(line: string) -> LogEntry {
# Pattern: [LEVEL] category: message
let pattern: Regex = (compile "\\[([A-Z]+)\\] ([a-z]+): (.+)")
let parts: array<string> = (groups pattern line)
if (< (array_length parts) 4) {
(log_error "parser" "Invalid log line format")
return LogEntry { level: "", category: "", message: "" }
}
let entry: LogEntry = LogEntry {
level: (at parts 1),
category: (at parts 2),
message: (at parts 3)
}
return entry
}
fn format_log_entry(entry: LogEntry) -> string {
let mut sb: StringBuilder = (StringBuilder_new)
set sb (StringBuilder_append sb "[")
set sb (StringBuilder_append sb entry.level)
set sb (StringBuilder_append sb "] ")
set sb (StringBuilder_append sb entry.category)
set sb (StringBuilder_append sb ": ")
set sb (StringBuilder_append sb entry.message)
return (StringBuilder_to_string sb)
}
shadow format_log_entry {
let entry: LogEntry = LogEntry {
level: "INFO",
category: "app",
message: "Started"
}
let formatted: string = (format_log_entry entry)
assert (== formatted "[INFO] app: Started")
}
Summary
In this chapter, you learned:
- ✅ Regular expressions: compile, match, find, replace, split
- ✅ Structured logging: 6 levels, categories, formatting
- ✅ StringBuilder: efficient string building, O(n) performance
- ✅ Combined usage for text processing tasks
Quick Reference
| Module | Key Functions |
|---|---|
| **regex** | compile, matches, find, find_all, groups, replace, replace_all, split |
| **log** | log_trace, log_debug, log_info, log_warn, log_error, log_fatal |
| **StringBuilder** | StringBuilder_new, StringBuilder_append, StringBuilder_to_string, StringBuilder_length |
---
**Previous:** Chapter 12: System & Runtime
**Next:** Chapter 14: Data Formats