(* Application of the AST zipper design pattern to CRQL (Core RDF Query Language). This is a companion document to a paper submitted to ESWC'16: "Bridging the gap between formal languages and natural languages with zippers". Creator: Sébastien Ferré Creation: April 2015 Modified: December 2015 *) (* RDF types (simplified) *) type uri = string type rdf_class = uri type rdf_prop = uri type rdf_literal = Plain of string * string | Typed of string * uri type rdf_id = string type rdf_node = URI of uri | Literal of rdf_literal | Blank of rdf_id (* AST datatype *) type s = Select of np and np = Something | Some of rdf_class | Node of rdf_node | That of np * vp | NAnd of np * np | NOr of np * np and vp = IsA of rdf_class | Has of rdf_prop * np | IsOf of rdf_prop * np | Equal of rdf_node | Matches of string | Leq of rdf_literal | Geq of rdf_literal | True | And of vp * vp | Or of vp * vp | Not of vp | Option of vp let np_ex = That (Some ("dbo:Film"), And (Has ("dbo:director", Node (URI "dbr:Steven_Spielberg")), Has ("dbo:releaseDate", That (Something, Geq (Typed ("2010-01-01","xsd:date")))))) (* AST context datatypes: derived from AST datatypes *) type s' = Root and np' = Select' of s' | That1' of np' * vp | Has2' of rdf_prop * vp' | IsOf2' of rdf_prop * vp' | NAnd1' of np' * np | NAnd2' of np * np' | NOr1' of np' * np | NOr2' of np * np' and vp' = That2' of np * np' | And1' of vp' * vp | And2' of vp * vp' | Or1' of vp' * vp | Or2' of vp * vp' | Not' of vp' | Option' of vp' let np'_ex = IsOf2' ("dbo:genre", That2' (Something, Select' Root)) (* AST zipper *) type zipper = S of s * s' | NP of np * np' | VP of vp * vp' let zipper_ex = NP (np_ex, np'_ex) (* initial AST zipper *) let rec s0 = Select np0 and np0 = Something and vp0 = True let zipper0 = S (s0,Root) (* AST zipper transformations *) type transf = zipper -> zipper let id : transf = fun zip -> zip let down : transf = function | S (Select np, s') -> NP (np, Select' s') | NP (That (np,vp), np') -> NP (np, That1' (np', vp)) | NP (NAnd (np1,np2), np') -> NP (np1, NAnd1' (np',np2)) | NP (NOr (np1,np2), np') -> NP (np1, NOr1' (np',np2)) | VP (Has (prop,np), vp') -> NP (np, Has2' (prop, vp')) | VP (IsOf (prop,np), vp') -> NP (np, IsOf2' (prop, vp')) | VP (And (vp1,vp2), vp') -> VP (vp1, And1' (vp',vp2)) | VP (Or (vp1,vp2), vp') -> VP (vp1, Or1' (vp',vp2)) | VP (Not vp, vp') -> VP (vp, Not' vp') | VP (Option vp, vp') -> VP (vp, Option' vp') | zip -> zip let up : transf = function | NP (np, Select' s') -> S (Select np, s') | NP (np, Has2' (prop,vp')) -> VP (Has (prop,np), vp') | NP (np, IsOf2' (prop,vp')) -> VP (IsOf (prop,np), vp') | NP (np, NAnd1' (np',np2)) -> NP (NAnd (np,np2), np') | NP (np, NAnd2' (np1,np')) -> NP (NAnd (np1,np), np') | NP (np, NOr1' (np',np2)) -> NP (NOr (np,np2), np') | NP (np, NOr2' (np1,np')) -> NP (NOr (np1,np), np') | NP (np, That1' (np',vp)) -> NP (That (np,vp), np') | VP (vp, That2' (np,np')) -> NP (That (np,vp), np') | VP (vp, And1' (vp',vp2)) -> VP (And (vp,vp2), vp') | VP (vp, And2' (vp1,vp')) -> VP (And (vp1,vp), vp') | VP (vp, Or1' (vp',vp2)) -> VP (Or (vp,vp2), vp') | VP (vp, Or2' (vp1,vp')) -> VP (Or (vp1,vp), vp') | VP (vp, Not' vp') -> VP (Not vp, vp') | VP (vp, Option' vp') -> VP (Option vp, vp') | zip -> zip let right : transf = function | NP (np, That1' (np',vp)) -> VP (vp, That2' (np,np')) | NP (np1, NAnd1' (np',np2)) -> NP (np2, NAnd2' (np1,np')) | NP (np1, NOr1' (np',np2)) -> NP (np2, NOr2' (np1,np')) | VP (vp1, And1' (vp',vp2)) -> VP (vp2, And2' (vp1,vp')) | VP (vp1, Or1' (vp',vp2)) -> VP (vp2, Or2' (vp1,vp')) | zip -> zip let left : transf = function | VP (vp, That2' (np,np')) -> NP (np, That1' (np',vp)) | NP (np2, NAnd2' (np1,np')) -> NP (np1, NAnd1' (np',np2)) | NP (np2, NOr2' (np1,np')) -> NP (np1, NOr1' (np',np2)) | VP (vp2, And2' (vp1,vp')) -> VP (vp1, And1' (vp',vp2)) | VP (vp2, Or2' (vp1,vp')) -> VP (vp1, Or1' (vp',vp2)) | zip -> zip let insert_np (np : np) : transf = function | NP (Something, np') -> NP (np, np') | zip -> zip let insert_vp (vp : vp) : transf = function | VP (True, vp') -> VP (vp, vp') | zip -> zip let delete : transf = function | S (_, s') -> S (s0, s') | NP (_, NAnd1' (np',np2)) -> NP (np2, np') | NP (_, NAnd2' (np1,np')) -> NP (np1, np') | NP (_, NOr1' (np',np2)) -> NP (np2, np') | NP (_, NOr2' (np1,np')) -> NP (np1, np') | NP (_, np') -> NP (np0, np') | VP (_, That2' (np,np')) -> NP (np,np') | VP (_, And1' (vp',vp2)) -> VP (vp2, vp') | VP (_, And2' (vp1,vp')) -> VP (vp1, vp') | VP (_, Or1' (vp',vp2)) -> VP (vp2, vp') | VP (_, Or2' (vp1,vp')) -> VP (vp1, vp') | VP (_, vp') -> VP (vp0, vp') let insert_that : transf = function | NP (np,np') -> VP (vp0, That2' (np,np')) | zip -> zip let insert_and : transf = function | NP (np, np') -> NP (np0, NAnd2' (np,np')) | VP (vp, vp') -> VP (vp0, And2' (vp,vp')) | zip -> zip let insert_or : transf = function | NP (np, np') -> NP (np0, NOr2' (np,np')) | VP (vp, vp') -> VP (vp0, Or2' (vp,vp')) | zip -> zip let toggle_not : transf = function | VP (Not vp, vp') -> VP (vp, vp') | VP (vp, vp') -> VP (Not vp, vp') | zip -> zip let toggle_option : transf = function | VP (Option vp, vp') -> VP (vp, vp') | VP (vp, vp') -> VP (Option vp, vp') | zip -> zip (* edition trace to reach a sentence *) let (/) t1 t2 = fun zip -> t2 (t1 zip) (* infix operator for composition of transformations *) let rec trace_s : s -> transf = function | Select np -> down / trace_np np / up and trace_np : np -> transf = function | Something -> id | Some c -> insert_np (Some c) | Node n -> insert_np (Node n) | That (np,vp) -> trace_np np / insert_that / trace_vp vp / up | NAnd (np1,np2) -> trace_np np1 / insert_and / trace_np np2 / up | NOr (np1,np2) -> trace_np np1 / insert_or / trace_np np2 / up and trace_vp : vp -> transf = function | IsA c -> insert_vp (IsA c) | Has (p,np) -> insert_vp (Has (p,np0)) / down / trace_np np / up | IsOf (p,np) -> insert_vp (IsOf (p,np0)) / down / trace_np np / up | Equal n -> insert_vp (Equal n) | Matches s -> insert_vp (Matches s) | Leq l -> insert_vp (Leq l) | Geq l -> insert_vp (Geq l) | True -> id | And (vp1,vp2) -> trace_vp vp1 / insert_and / trace_vp vp2 / up | Or (vp1,vp2) -> trace_vp vp1 / insert_or / trace_vp vp2 / up | Not vp -> trace_vp vp / toggle_not | Option vp -> trace_vp vp / toggle_option let rec trace : zipper -> transf = fun zip -> let left_zip = left zip in if left_zip <> zip then trace left_zip / right else let up_zip = up zip in if up_zip <> zip then trace up_zip / down else match zip with | S (s, Root) -> trace_s s | _ -> assert false (* every other zipper should define UP or LEFT *) let trace_ex = trace zipper_ex (* formalization to SPARQL *) (* normalization of a zipper into a sentence, to use before translating to SPARQL. *) let rec norm : zipper -> s = function | S (s, Root) -> s | NP (np1, NOr1' (np',np2)) -> norm (NP (np1, np')) | NP (np2, NOr2' (np1,np')) -> norm (NP (np2, np')) | VP (vp1, Or1' (vp',vp2)) -> norm (VP (vp1, vp')) | VP (vp2, Or2' (vp1,vp')) -> norm (VP (vp2, vp')) | VP (vp, Not' vp') -> norm (VP (vp, vp')) | VP (vp, Option' vp') -> norm (VP (vp, vp')) | zip -> norm (up zip) (* representation of SPARQL queries by strings *) type sparql = string type term = string type pattern = string let (+) = (^) (* using '+' for string concatenation *) (* utilities for SPARQL variables *) let counter = ref 0 (* counter for variables *) let var_list = ref "" (* record of list of generated variables *) let new_var () = (* utility function to return fresh SPARQL variables *) incr counter; let v = "?x" ^ string_of_int !counter in var_list := !var_list + " " + v; v (* utility functions for producing SPARQL terms *) let sparql_uri uri = uri let sparql_string s = "\"" + String.escaped s + "\"" let sparql_literal = function | Plain (s,lang) -> sparql_string s + "@" + lang | Typed (s,dt) -> sparql_string s + "^^" + dt let sparql_node = function | URI uri -> sparql_uri uri | Literal lit -> sparql_literal lit | Blank id -> "_:" + id (* utility functions for producing SPARQL patterns *) let sparql_triple s p o = s + " " + p + " " + o let sparql_join p1 p2 = if p1="" then p2 else if p2="" then p1 else p1 + " . " + p2 let sparql_union p1 p2 = if p1="" then p2 else if p2="" then p1 else "{ " + p1 + " } UNION { " + p2 + " }" (* main functions for translatings ASTs to SPARQL *) let rec sparql_s : s -> sparql = function | Select np -> let np = sparql_np np in "SELECT " + !var_list + " WHERE { " + np (fun x -> "") + " }" and sparql_np : np -> ((term -> pattern) -> pattern) = function | Something -> let x_i = new_var () in (fun d -> d x_i) | Some c -> let x_i = new_var () in let c = sparql_uri c in (fun d -> sparql_join (sparql_triple x_i "a" c) (d x_i)) | Node n -> let n = sparql_node n in (fun d -> d n) | That (np,vp) -> let np = sparql_np np in let vp = sparql_vp vp in (fun d -> np (fun x -> sparql_join (d x) (vp x))) | NAnd (np1,np2) -> let np1 = sparql_np np1 in let np2 = sparql_np np2 in (fun d -> sparql_join (np1 d) (np2 d)) | NOr (np1,np2) -> let np1 = sparql_np np1 in let np2 = sparql_np np2 in (fun d -> sparql_union (np1 d) (np2 d)) and sparql_vp : vp -> (term -> pattern) = function | IsA c -> let c = sparql_uri c in (fun x -> sparql_triple x "a" c) | Has (p,np) -> let p = sparql_uri p in let np = sparql_np np in (fun x -> np (fun y -> sparql_triple x p y)) | IsOf (p,np) -> let p = sparql_uri p in let np = sparql_np np in (fun x -> np (fun y -> sparql_triple y p x)) | Equal n -> let n = sparql_node n in (fun x -> "FILTER (" + x + " = " + n + ")") | Matches s -> let s = sparql_string s in (fun x -> "FILTER (REGEX(str(" + x + ")," + s + "))") | Leq l -> let l = sparql_literal l in (fun x -> "FILTER (" + x + " <= " + l + ")") | Geq l -> let l = sparql_literal l in (fun x -> "FILTER (" + x + " >= " + l + ")") | True -> (fun x -> "") | And (vp1,vp2) -> let vp1 = sparql_vp vp1 in let vp2 = sparql_vp vp2 in (fun x -> sparql_join (vp1 x) (vp2 x)) | Or (vp1,vp2) -> let vp1 = sparql_vp vp1 in let vp2 = sparql_vp vp2 in (fun x -> sparql_union (vp1 x) (vp2 x)) | Not vp -> let vp = sparql_vp vp in (fun x -> "FILTER NOT EXISTS { " + vp x + " }") | Option vp -> let vp = sparql_vp vp in (fun x -> "OPTIONAL { " + vp x + " }") (* main function for translating a zipper to SPARQL *) let sparql (zip : zipper) : sparql = let s = norm zip in sparql_s s let sparql_ex = sparql zipper_ex (* verbalization to English *) let (++) s1 s2 = s1 ^ " " ^ s2 let noun_uri = function | "dbo:Film" -> "film" | "dbo:genre" -> "genre" | "dbo:releaseDate" -> "release date" | "dbo:director" -> "director" | "dbr:Steven_Spielberg" -> "Steven Spielberg" | uri -> uri (* to be extended by extracting local name from URI, or accessing labels from store *) let noun_string s = "\"" ^ String.escaped s ^ "\"" let noun_literal : rdf_literal -> string * uri = function | Plain (s,lang) -> noun_string s, "xsd:string" | Typed ("2010-01-01","xsd:date") -> "January 1st, 2010", "xsd:date" | Typed (s,dt) -> s, dt (* to be extended for each datatype, e.g. January 1st, 2010 for 2010-01-01 *) let noun_node = function | URI uri -> noun_uri uri | Literal lit -> fst (noun_literal lit) | Blank id -> "the" ++ id let english_a_an noun = if List.mem noun.[0] ['a'; 'e'; 'i'; 'o'] then "an" ++ noun else "a" ++ noun let is neg = if neg then "is not" else "is" let leq neg dt = match dt with | "xsd:date" -> if neg then "after" else "before" | _ -> if neg then "greater than" else "lesser or equal to" let geq neg dt = match dt with | "xsd:date" -> if neg then "before" else "after" | _ -> if neg then "lesser than" else "greater or equal to" let rec english_s : s -> string = function | Select np -> let np = english_np np in "Give me" ++ np and english_np : np -> string = function | Something -> "something" | Some c -> let c = noun_uri c in english_a_an c | Node n -> noun_node n | That (np,vp) -> let np = english_np np in let vp = english_vp vp in np ++ (vp false) | NAnd (np1,np2) -> let np1 = english_np np1 in let np2 = english_np np2 in np1 ++ "and" ++ np2 | NOr (np1,np2) -> let np1 = english_np np1 in let np2 = english_np np2 in np1 ++ "or" ++ np2 and english_vp : vp -> (bool -> string) = function | IsA c -> let c = noun_uri c in (fun neg -> "that" ++ is neg ++ english_a_an c) | Has (p,np) -> let p = noun_uri p in let np = english_np np in (fun neg -> "whose" ++ p ++ is neg ++ np) | IsOf (p,np) -> let p = noun_uri p in let np = english_np np in (fun neg -> "that" ++ is neg ++ "the" ++ p ++ "of" ++ np) | Equal n -> let n = noun_node n in (fun neg -> "that" ++ is neg ++ n) | Matches s -> let s = noun_string s in (fun neg -> "that" ++ (if neg then "does not match" else "matches") ++ s) | Leq l -> let l, dt = noun_literal l in (fun neg -> leq neg dt ++ l) | Geq l -> let l, dt = noun_literal l in (fun neg -> geq neg dt ++ l) | True -> (fun neg -> "that" ++ (if neg then "does not" else "does") ++ "...") | And (vp1,vp2) -> let vp1 = english_vp vp1 in let vp2 = english_vp vp2 in (fun neg -> (vp1 neg) ++ (if neg then "or" else "and") ++ (vp2 neg)) | Or (vp1,vp2) -> let vp1 = english_vp vp1 in let vp2 = english_vp vp2 in (fun neg -> (vp1 neg) ++ (if neg then "and" else "or") ++ (vp2 neg)) | Not vp -> let vp = english_vp vp in (fun neg -> vp (not neg)) | Option vp -> let vp = english_vp vp in (fun neg -> "optionally" ++ vp neg) let english (zip : zipper) : string = let s = norm zip in english_s s let english_ex = english zipper_ex