#
# LGR XML Schema 1.0
#
default namespace = "urn:ietf:params:xml:ns:lgr-1.0"
#
# SIMPLE TYPES
#
# RFC 5646 language tag (e.g., "de", "und-Latn")
language-tag = xsd:token
# The scope to which the LGR applies. For the "domain" scope type,
# it should be a fully qualified domain name.
scope-value = xsd:token {
minLength = "1"
}
## a single code point
code-point = xsd:token {
pattern = "[0-9A-F]{4,6}"
}
## a space-separated sequence of code points
code-point-sequence = xsd:token {
pattern = "[0-9A-F]{4,6}( [0-9A-F]{4,6})+"
}
## single code point, or a sequence of code points, or empty string
code-point-literal = code-point | code-point-sequence | ""
## code point or sequence only
non-empty-code-point-literal = code-point | code-point-sequence
## code point sent represented in short form
code-point-set-shorthand = xsd:token {
pattern = "([0-9A-F]{4,6}|[0-9A-F]{4,6}-[0-9A-F]{4,6})"
~ "( ([0-9A-F]{4,6}|[0-9A-F]{4,6}-[0-9A-F]{4,6}))*"
}
Davies & Freytag Standards Track [Page 71]
RFC 7940 Label Generation Rulesets in XML August 2016
## dates are used in information fields in the meta
## section ("YYYY-MM-DD")
date-pattern = xsd:token {
pattern = "\d{4}-\d\d-\d\d"
}
## variant type
## the variant type MUST be non-empty and MUST NOT
## start with a "_"; using xsd:NMTOKEN here because
## we need space-separated lists of them
variant-type = xsd:NMTOKEN
## variant type list for action triggers
## the list MUST NOT be empty, and entries MUST NOT
## start with a "_"
variant-type-list = xsd:NMTOKENS
## reference to a rule name (used in "when" and "not-when"
## attributes, as well as the "by-ref" attribute of the "rule"
## element).
rule-ref = xsd:IDREF
## a space-separated list of tags. Tags should generally follow
## xsd:Name syntax. However, we are using the xsd:NMTOKENS here
## because there is no native XSD datatype for space-separated
## xsd:Name
tags = xsd:NMTOKENS
## The value space of a "from-tag" attribute. Although it is closer
## to xsd:IDREF lexically and semantically, tags are not unique in
## the document. As such, we are unable to take advantage of
## facilities provided by a validator. xsd:NMTOKEN is used instead
## of the stricter xsd:Names here so as to be consistent with
## the above.
tag-ref = xsd:NMTOKEN
## an identifier type (used by "name" attributes).
identifier = xsd:ID
## used in the class "by-ref" attribute to reference another class of
## the same "name" attribute value.
class-ref = xsd:IDREF
## "count" attribute pattern ("n", "n+", or "n:m")
count-pattern = xsd:token {
pattern = "\d+(\+|:\d+)?"
}
Davies & Freytag Standards Track [Page 72]
RFC 7940 Label Generation Rulesets in XML August 2016
## "ref" attribute pattern
## space-separated list of "id" attribute values for
## "reference" elements. These reference ids
## must be declared in a "reference" element
## before they can be used in a "ref" attribute
ref-pattern = xsd:token {
pattern = "[\-_.:0-9A-Z]+( [\-_.:0-9A-Z]+)*"
}
#
# STRUCTURES
#
## Representation of a single code point or a sequence of code
## points
char = element char {
attribute cp { code-point-literal },
attribute comment { text }?,
attribute when { rule-ref }?,
attribute not-when { rule-ref }?,
attribute tag { tags }?,
attribute ref { ref-pattern }?,
variant*
}
## Representation of a range of code points
range = element range {
attribute first-cp { code-point },
attribute last-cp { code-point },
attribute comment { text }?,
attribute when { rule-ref }?,
attribute not-when { rule-ref }?,
attribute tag { tags }?,
attribute ref { ref-pattern }?
}
## Representation of a variant code point or sequence
variant = element var {
attribute cp { code-point-literal },
attribute type { xsd:NMTOKEN }?,
attribute when { rule-ref }?,
attribute not-when { rule-ref }?,
attribute comment { text }?,
attribute ref { ref-pattern }?
}
Davies & Freytag Standards Track [Page 73]
RFC 7940 Label Generation Rulesets in XML August 2016
#
# Classes
#
## a "class" element that references the name of another "class"
## (or set-operator like "union") defined elsewhere.
## If used as a matcher (appearing under a "rule" element),
## the "count" attribute may be present.
class-invocation = element class { class-invocation-content }
class-invocation-content =
attribute by-ref { class-ref },
attribute count { count-pattern }?,
attribute comment { text }?
## defines a new class (set of code points) using Unicode property
## or code points of the same tag value or code point literals
class-declaration = element class { class-declaration-content }
class-declaration-content =
# "name" attribute MUST be present if this is a "top-level"
# class declaration, i.e., appearing directly under the "rules"
# element. Otherwise, it MUST be absent.
attribute name { identifier }?,
# If used as a matcher (appearing in a "rule" element, but not
# when nested inside a set-operator or class), the "count"
# attribute may be present. Otherwise, it MUST be absent.
attribute count { count-pattern }?,
attribute comment { text }?,
attribute ref { ref-pattern }?,
(
# define the class by property (e.g., property="sc:Latn"), OR
attribute property { xsd:NMTOKEN }
# define the class by tagged code points, OR
| attribute from-tag { tag-ref }
# text node to allow for shorthand notation
# e.g., "0061 0062-0063"
| code-point-set-shorthand
)
Davies & Freytag Standards Track [Page 74]
RFC 7940 Label Generation Rulesets in XML August 2016
class-invocation-or-declaration = element class {
class-invocation-content | class-declaration-content
}
class-or-set-operator-nested =
class-invocation-or-declaration | set-operator
class-or-set-operator-declaration =
# a "class" element or set-operator (effectively defining a class)
# directly in the "rules" element.
class-declaration | set-operator
#
# set-operators
#
complement-operator = element complement {
attribute name { identifier }?,
attribute comment { text }?,
attribute ref { ref-pattern }?,
# "count" attribute MUST only be used when this set-operator is
# used as a matcher (i.e., nested in a "rule" element but not
# inside a set-operator or class)
attribute count { count-pattern }?,
class-or-set-operator-nested
}
union-operator = element union {
attribute name { identifier }?,
attribute comment { text }?,
attribute ref { ref-pattern }?,
# "count" attribute MUST only be used when this set-operator is
# used as a matcher (i.e., nested in a "rule" element but not
# inside a set-operator or class)
attribute count { count-pattern }?,
class-or-set-operator-nested,
# needs two or more child elements
class-or-set-operator-nested+
}
Davies & Freytag Standards Track [Page 75]
RFC 7940 Label Generation Rulesets in XML August 2016
intersection-operator = element intersection {
attribute name { identifier }?,
attribute comment { text }?,
attribute ref { ref-pattern }?,
# "count" attribute MUST only be used when this set-operator is
# used as a matcher (i.e., nested in a "rule" element but not
# inside a set-operator or class)
attribute count { count-pattern }?,
class-or-set-operator-nested,
class-or-set-operator-nested
}
difference-operator = element difference {
attribute name { identifier }?,
attribute comment { text }?,
attribute ref { ref-pattern }?,
# "count" attribute MUST only be used when this set-operator is
# used as a matcher (i.e., nested in a "rule" element but not
# inside a set-operator or class)
attribute count { count-pattern }?,
class-or-set-operator-nested,
class-or-set-operator-nested
}
symmetric-difference-operator = element symmetric-difference {
attribute name { identifier }?,
attribute comment { text }?,
attribute ref { ref-pattern }?,
# "count" attribute MUST only be used when this set-operator is
# used as a matcher (i.e., nested in a "rule" element but not
# inside a set-operator or class)
attribute count { count-pattern }?,
class-or-set-operator-nested,
class-or-set-operator-nested
}
## operators that transform class(es) into a new class.
set-operator = complement-operator
| union-operator
| intersection-operator
| difference-operator
| symmetric-difference-operator
Davies & Freytag Standards Track [Page 76]
RFC 7940 Label Generation Rulesets in XML August 2016
#
# Match operators (matchers)
#
any-matcher = element any {
attribute count { count-pattern }?,
attribute comment { text }?
}
choice-matcher = element choice {
## "count" attribute MUST only be used when the choice-matcher
## contains no nested "start", "end", "anchor", "look-behind",
## or "look-ahead" operators and no nested rule-matchers
## containing any of these elements
attribute count { count-pattern }?,
attribute comment { text }?,
# two or more match operators
match-operator-choice,
match-operator-choice+
}
char-matcher =
# for use as a matcher - like "char" but without a "tag" attribute
element char {
attribute cp { non-empty-code-point-literal },
# If used as a matcher (appearing in a "rule" element), the
# "count" attribute may be present. Otherwise, it MUST be
# absent.
attribute count { count-pattern }?,
attribute comment { text }?,
attribute ref { ref-pattern }?
}
start-matcher = element start {
attribute comment { text }?
}
end-matcher = element end {
attribute comment { text }?
}
anchor-matcher = element anchor {
attribute comment { text }?
}
Davies & Freytag Standards Track [Page 77]
RFC 7940 Label Generation Rulesets in XML August 2016
look-ahead-matcher = element look-ahead {
attribute comment { text }?,
match-operators-non-pos
}
look-behind-matcher = element look-behind {
attribute comment { text }?,
match-operators-non-pos
}
## non-positional match operator that can be used as a direct child
## element of the choice-matcher.
match-operator-choice = (
any-matcher | choice-matcher | start-matcher | end-matcher
| char-matcher | class-or-set-operator-nested | rule-matcher
)
## non-positional match operators do not contain any "anchor",
## "look-behind", or "look-ahead" elements.
match-operators-non-pos = (
start-matcher?,
(any-matcher | choice-matcher | char-matcher
| class-or-set-operator-nested | rule-matcher)*,
end-matcher?
)
## positional match operators have an "anchor" element, which may be
## preceded by a "look-behind" element, or followed by a "look-ahead"
## element, or both.
match-operators-pos =
look-behind-matcher?, anchor-matcher, look-ahead-matcher?
match-operators = match-operators-non-pos | match-operators-pos
Davies & Freytag Standards Track [Page 78]
RFC 7940 Label Generation Rulesets in XML August 2016
#
# Rules
#
# top-level rule must have "name" attribute
rule-declaration-top = element rule {
attribute name { identifier },
attribute comment { text }?,
attribute ref { ref-pattern }?,
match-operators
}
## "rule" element used as a matcher (either "by-ref" or contains
## other match operators itself)
rule-matcher =
element rule {
## "count" attribute MUST only be used when the rule-matcher
## contains no nested "start", "end", "anchor", "look-behind",
## or "look-ahead" operators and no nested rule-matchers
## containing any of these elements
attribute count { count-pattern }?,
attribute comment { text }?,
attribute ref { ref-pattern }?,
(attribute by-ref { rule-ref } | match-operators)
}
#
# Actions
#
action-declaration = element action {
attribute comment { text }?,
attribute ref { ref-pattern }?,
# dispositions are often named after variant types or vice versa
attribute disp { variant-type },
( attribute match { rule-ref }
| attribute not-match { rule-ref } )?,
( attribute any-variant { variant-type-list }
| attribute all-variants { variant-type-list }
| attribute only-variants { variant-type-list } )?
}
Davies & Freytag Standards Track [Page 79]
RFC 7940 Label Generation Rulesets in XML August 2016
# DOCUMENT STRUCTURE
start = lgr
lgr = element lgr {
meta-section?,
data-section,
rules-section?
}
## Meta section - information recorded with an LGR that generally
## does not affect machine processing (except for "unicode-version").
## However, if any "class-declaration" uses the "property" attribute,
## a "unicode-version" element MUST be present.
meta-section = element meta {
element version {
attribute comment { text }?,
text
}?
& element date { date-pattern }?
& element language { language-tag }*
& element scope {
# type may by "domain" or an application-defined value
attribute type { xsd:NCName },
scope-value
}*
& element validity-start { date-pattern }?
& element validity-end { date-pattern }?
& element unicode-version {
xsd:token {
pattern = "\d+\.\d+\.\d+"
}
}?
& element description {
# this SHOULD be a valid MIME type
attribute type { text }?,
text
}?
Davies & Freytag Standards Track [Page 80]
RFC 7940 Label Generation Rulesets in XML August 2016
& element references {
element reference {
attribute id {
xsd:token {
# limit "id" attribute to uppercase letters,
# digits, and a few punctuation marks; use of
# integers is RECOMMENDED
pattern = "[\-_.:0-9A-Z]*"
minLength = "1"
}
},
attribute comment { text }?,
text
}*
}?
}
data-section = element data { (char | range)+ }
## Note that action declarations are strictly order dependent.
## class-or-set-operator-declaration and rule-declaration-top
## are weakly order dependent; they must precede first use of the
## identifier via "by-ref".
rules-section = element rules {
( class-or-set-operator-declaration
| rule-declaration-top
| action-declaration)*
}
Davies & Freytag Standards Track [Page 81]
RFC 7940 Label Generation Rulesets in XML August 2016
Acknowledgements
This format builds upon the work on documenting IDN tables by many
different registry operators. Notably, a comprehensive language
table for Chinese, Japanese, and Korean was developed by the "Joint
Engineering Team" [RFC3743]; this table is the basis of many registry
policies. Also, a set of guidelines for Arabic script registrations
[RFC5564] was published by the Arabic-language community.
Contributions that have shaped this document have been provided by
Francisco Arias, Julien Bernard, Mark Davis, Martin Duerst, Paul
Hoffman, Sarmad Hussain, Barry Leiba, Alexander Mayrhofer, Alexey
Melnikov, Nicholas Ostler, Thomas Roessler, Audric Schiltknecht,
Steve Sheng, Michel Suignard, Andrew Sullivan, Wil Tan, and John
Yunker.
Authors' Addresses
Kim Davies
Internet Corporation for Assigned Names and Numbers
12025 Waterfront Drive
Los Angeles, CA 90094
United States of America
Phone: +1 310 301 5800
Email: kim.davies@icann.org
URI: http://www.icann.org/
Asmus Freytag
ASMUS, Inc.
Email: asmus@unicode.org
Davies & Freytag Standards Track [Page 82]
gemini://gemini.bortzmeyer.org/rfc-mirror/rfc7940.txt -- Leo's gemini proxy
-- Connecting to gemini.bortzmeyer.org:1965...
-- Connected
-- Sending request
-- Meta line: 20 text/plain
-- Response ended
-- Page fetched on Mon May 6 12:15:34 2024