Regular Expressions #

This file contains the formal definition for regular expressions and basic lemmas. Note these are regular expressions in terms of formal language theory. Note this is different to regex's used in computer science such as the POSIX standard.

TODO #

Show that this regular expressions and DFA/NFA's are equivalent.

source

inductive RegularExpression (α : Type u) :

Type u

This is the definition of regular expressions. The names used here is to mirror the definition of a Kleene algebra (https://en.wikipedia.org/wiki/Kleene_algebra).

0 (zero) matches nothing
1 (epsilon) matches only the empty string
char a matches only the string 'a'
star P matches any finite concatenation of strings which match P
P + Q (plus P Q) matches anything which match P or Q
P * Q (comp P Q) matches x ++ y if x matches P and y matches Q

zero: {α : Type u} → RegularExpression α
epsilon: {α : Type u} → RegularExpression α
char: {α : Type u} → α → RegularExpression α
plus: {α : Type u} → RegularExpression α → RegularExpression α → RegularExpression α
comp: {α : Type u} → RegularExpression α → RegularExpression α → RegularExpression α
star: {α : Type u} → RegularExpression α → RegularExpression α

source

instance RegularExpression.instInhabited {α : Type u_1} :

Inhabited (RegularExpression α)

Equations

RegularExpression.instInhabited = { default := RegularExpression.zero }

source

instance RegularExpression.instAdd {α : Type u_1} :

Add (RegularExpression α)

Equations

RegularExpression.instAdd = { add := RegularExpression.plus }

source

instance RegularExpression.instMul {α : Type u_1} :

Mul (RegularExpression α)

Equations

RegularExpression.instMul = { mul := RegularExpression.comp }

source

instance RegularExpression.instOne {α : Type u_1} :

One (RegularExpression α)

Equations

RegularExpression.instOne = { one := RegularExpression.epsilon }

source

instance RegularExpression.instZero {α : Type u_1} :

Zero (RegularExpression α)

Equations

RegularExpression.instZero = { zero := RegularExpression.zero }

source

instance RegularExpression.instPowNat {α : Type u_1} :

Pow (RegularExpression α) ℕ

Equations

RegularExpression.instPowNat = { pow := fun (n : RegularExpression α) (r : ℕ) => npowRec r n }

source

@[simp]

theorem RegularExpression.zero_def {α : Type u_1} :

RegularExpression.zero = 0

source

@[simp]

theorem RegularExpression.one_def {α : Type u_1} :

RegularExpression.epsilon = 1

source

@[simp]

theorem RegularExpression.plus_def {α : Type u_1} (P : RegularExpression α) (Q : RegularExpression α) :

P.plus Q = P + Q

source

@[simp]

theorem RegularExpression.comp_def {α : Type u_1} (P : RegularExpression α) (Q : RegularExpression α) :

P.comp Q = P * Q

source

def RegularExpression.matches' {α : Type u_1} :

RegularExpression α → Language α

matches' P provides a language which contains all strings that P matches

Equations

RegularExpression.zero.matches' = 0
RegularExpression.epsilon.matches' = 1
(RegularExpression.char a).matches' = {[a]}
(P.plus Q).matches' = P.matches' + Q.matches'
(P.comp Q).matches' = P.matches' * Q.matches'
P.star.matches' = KStar.kstar P.matches'

source

@[simp]

theorem RegularExpression.matches'_zero {α : Type u_1} :

RegularExpression.matches' 0 = 0

source

@[simp]

theorem RegularExpression.matches'_epsilon {α : Type u_1} :

RegularExpression.matches' 1 = 1

source

@[simp]

theorem RegularExpression.matches'_char {α : Type u_1} (a : α) :

(RegularExpression.char a).matches' = {[a]}

source

@[simp]

theorem RegularExpression.matches'_add {α : Type u_1} (P : RegularExpression α) (Q : RegularExpression α) :

(P + Q).matches' = P.matches' + Q.matches'

source

@[simp]

theorem RegularExpression.matches'_mul {α : Type u_1} (P : RegularExpression α) (Q : RegularExpression α) :

(P * Q).matches' = P.matches' * Q.matches'

source

@[simp]

theorem RegularExpression.matches'_pow {α : Type u_1} (P : RegularExpression α) (n : ℕ) :

(P ^ n).matches' = P.matches' ^ n

source

@[simp]

theorem RegularExpression.matches'_star {α : Type u_1} (P : RegularExpression α) :

P.star.matches' = KStar.kstar P.matches'

source

def RegularExpression.matchEpsilon {α : Type u_1} :

RegularExpression α → Bool

matchEpsilon P is true if and only if P matches the empty string

Equations

RegularExpression.zero.matchEpsilon = false
RegularExpression.epsilon.matchEpsilon = true
(RegularExpression.char a).matchEpsilon = false
(P.plus Q).matchEpsilon = (P.matchEpsilon || Q.matchEpsilon)
(P.comp Q).matchEpsilon = (P.matchEpsilon && Q.matchEpsilon)
P.star.matchEpsilon = true

source

def RegularExpression.deriv {α : Type u_1} [DecidableEq α] :

RegularExpression α → α → RegularExpression α

P.deriv a matches x if P matches a :: x, the Brzozowski derivative of P with respect to a

Equations

RegularExpression.zero.deriv x = 0
RegularExpression.epsilon.deriv x = 0
(RegularExpression.char a₁).deriv x = if a₁ = x then 1 else 0
(P.plus Q).deriv x = P.deriv x + Q.deriv x
(P.comp Q).deriv x = if P.matchEpsilon = true then P.deriv x * Q + Q.deriv x else P.deriv x * Q
P.star.deriv x = P.deriv x * P.star

source

@[simp]

theorem RegularExpression.deriv_zero {α : Type u_1} [DecidableEq α] (a : α) :

RegularExpression.deriv 0 a = 0

source

@[simp]

theorem RegularExpression.deriv_one {α : Type u_1} [DecidableEq α] (a : α) :

RegularExpression.deriv 1 a = 0

source

@[simp]

theorem RegularExpression.deriv_char_self {α : Type u_1} [DecidableEq α] (a : α) :

(RegularExpression.char a).deriv a = 1

source

@[simp]

theorem RegularExpression.deriv_char_of_ne {α : Type u_1} {a : α} {b : α} [DecidableEq α] (h : a ≠ b) :

(RegularExpression.char a).deriv b = 0

source

@[simp]

theorem RegularExpression.deriv_add {α : Type u_1} [DecidableEq α] (P : RegularExpression α) (Q : RegularExpression α) (a : α) :

(P + Q).deriv a = P.deriv a + Q.deriv a

source

@[simp]

theorem RegularExpression.deriv_star {α : Type u_1} [DecidableEq α] (P : RegularExpression α) (a : α) :

P.star.deriv a = P.deriv a * P.star

source

def RegularExpression.rmatch {α : Type u_1} [DecidableEq α] :

RegularExpression α → List α → Bool

P.rmatch x is true if and only if P matches x. This is a computable definition equivalent to matches'.

Equations

x.rmatch [] = x.matchEpsilon
x.rmatch (a :: as) = (x.deriv a).rmatch as

source

@[simp]

theorem RegularExpression.zero_rmatch {α : Type u_1} [DecidableEq α] (x : List α) :

RegularExpression.rmatch 0 x = false

source

theorem RegularExpression.one_rmatch_iff {α : Type u_1} [DecidableEq α] (x : List α) :

RegularExpression.rmatch 1 x = true ↔ x = []

source

theorem RegularExpression.char_rmatch_iff {α : Type u_1} [DecidableEq α] (a : α) (x : List α) :

(RegularExpression.char a).rmatch x = true ↔ x = [a]

source

theorem RegularExpression.add_rmatch_iff {α : Type u_1} [DecidableEq α] (P : RegularExpression α) (Q : RegularExpression α) (x : List α) :

(P + Q).rmatch x = true ↔ P.rmatch x = true ∨ Q.rmatch x = true

source

theorem RegularExpression.mul_rmatch_iff {α : Type u_1} [DecidableEq α] (P : RegularExpression α) (Q : RegularExpression α) (x : List α) :

(P * Q).rmatch x = true ↔ ∃ (t : List α) (u : List α), x = t ++ u ∧ P.rmatch t = true ∧ Q.rmatch u = true

source

@[irreducible]

theorem RegularExpression.star_rmatch_iff {α : Type u_1} [DecidableEq α] (P : RegularExpression α) (x : List α) :

P.star.rmatch x = true ↔ ∃ (S : List (List α)), x = S.join ∧ ∀ t ∈ S, t ≠ [] ∧ P.rmatch t = true

source

@[simp]

theorem RegularExpression.rmatch_iff_matches' {α : Type u_1} [DecidableEq α] (P : RegularExpression α) (x : List α) :

P.rmatch x = true ↔ x ∈ P.matches'

source

instance RegularExpression.instDecidablePredListMemLanguageMatches' {α : Type u_1} [DecidableEq α] (P : RegularExpression α) :

DecidablePred fun (x : List α) => x ∈ P.matches'

Equations

P.instDecidablePredListMemLanguageMatches' x = decidable_of_iff (P.rmatch x = true) ⋯

source

def RegularExpression.map {α : Type u_1} {β : Type u_2} (f : α → β) :

RegularExpression α → RegularExpression β

Map the alphabet of a regular expression.

Equations

RegularExpression.map f RegularExpression.zero = 0
RegularExpression.map f RegularExpression.epsilon = 1
RegularExpression.map f (RegularExpression.char a) = RegularExpression.char (f a)
RegularExpression.map f (P.plus Q) = RegularExpression.map f P + RegularExpression.map f Q
RegularExpression.map f (P.comp Q) = RegularExpression.map f P * RegularExpression.map f Q
RegularExpression.map f P.star = (RegularExpression.map f P).star

source

@[simp]

theorem RegularExpression.map_pow {α : Type u_1} {β : Type u_2} (f : α → β) (P : RegularExpression α) (n : ℕ) :

RegularExpression.map f (P ^ n) = RegularExpression.map f P ^ n

source

@[simp]

theorem RegularExpression.map_id {α : Type u_1} (P : RegularExpression α) :

RegularExpression.map id P = P

source

@[simp]

theorem RegularExpression.map_map {α : Type u_1} {β : Type u_2} {γ : Type u_3} (g : β → γ) (f : α → β) (P : RegularExpression α) :

RegularExpression.map g (RegularExpression.map f P) = RegularExpression.map (g ∘ f) P

source

@[simp]

theorem RegularExpression.matches'_map {α : Type u_1} {β : Type u_2} (f : α → β) (P : RegularExpression α) :

(RegularExpression.map f P).matches' = (Language.map f) P.matches'

The language of the map is the map of the language.

Documentation

Mathlib.Computability.RegularExpressions

Regular Expressions #

TODO #