<!DOCTYPE TEI.2 SYSTEM "catalog/teilite.dtd" [


<!ENTITY % ISOlat1 PUBLIC "ISO 8879-1986//ENTITIES Added Latin 1//EN">
<!ENTITY % ISOlat2 PUBLIC "ISO 8879-1986//ENTITIES Added Latin 2//EN">
<!ENTITY % ISOnum
     PUBLIC "ISO 8879-1986//ENTITIES Numeric and Special Graphic//EN">
<!ENTITY % ISOpub  PUBLIC "ISO 8879-1986//ENTITIES Publishing//EN">
%ISOlat1; %ISOlat2; %ISOpub; %ISOnum;
<!--
<!ENTITY CIMIext1 SYSTEM "cimiext1.sgm">
-->
]>
<?STYLESPEC "teilite" "catalog/teilite.ssh">
<!-- TEI Doc. No:  TEI U 5                                   -->
<!-- Title:  Encoding texts in MR form:  a TEI tutorial      -->
<!-- Drafted:  28 Feb 91 MSM in haste                        -->
<!-- ******************************************************  -->
<!-- Revision History (add lines at top)                     -->
<!--  8 Jun 95 : CMSMcQ : changes from David Barnard         -->
<!--  3 Jun 95 : CMSMcQ : changes for CETH version           -->
<!-- 30 May 95 : LB: retitled                                -->
<!-- 16 May 95 : CMSMcQ : MSM's med and hard stuff           -->
<!-- 15 May 95 : CMSMcQ : MSM's easy stuff                   -->
<!-- 12 May 95 : completed first bash at xptrs etc           -->
<!-- 4 may 95: added sections on graphics and most ptr stuff -->
<!-- 1 May 95 : all easy and med changes complete            -->
<!-- 15 Apr 95 : LB trivial changes for Lite conformance     -->
<!-- 31 Mar 95 : MSM : cosmetic changes (reformat long lines) -->
<!--  1 dec 94 : LB: retag in teilite                        -->
<!-- 29 Jun 94 : LB : revise again                           -->
<!-- 28 jun 94 : LB : extensive revision of initial sections -->
<!-- 26 jun 94 : LB : expand material on names, dates etc.   -->
<!-- 25 jun 94 : LB : replace gloss lists by taglists        -->
<!-- 93-07-28 : MSM : add refs and taglist                   -->
<!-- 93-07-27 : MSM : complete revisions?                    -->
<!-- 93-07-27 : MSM : revisions (still going ...)            -->
<!-- 93-07-27 : MSM : revisions!                             -->
<!-- 93-07-26 : MSM : cosmetic changes                       -->
<!-- 93-07-26 : MSM : revise, include SH material            -->
<!-- 93-07-20 : MSM : change tags to ODD set                 -->
<!-- 93-07-20 : MSM : made file                              -->
<!-- Date     Who    What                                    -->
<!-- 20 Jul 93 CMSMcQ made TEI U1 from ED W21                -->
<!-- ******************************************************* -->
<TEI.2>
<TEIHEADER>
<FILEDESC>
<TITLESTMT><TITLE>TEI U5: Encoding for Interchange: an introduction to
the TEI</TITLE>
</TITLESTMT>
<PUBLICATIONSTMT>
<P>Not for publication or redistribution
</P>
</PUBLICATIONSTMT>
<SOURCEDESC><BIBL>TEI U1:  An Introduction to TEI Tagging (derived
from TEI ED W21:  Living with the Guidelines)</BIBL>
</SOURCEDESC>
</FILEDESC>
<PROFILEDESC><LANGUSAGE>
<LANGUAGE ID="en">English</LANGUAGE>
<LANGUAGE ID="sgml">some examples of SGML  use <MENTIONED>SGML</MENTIONED>
as their <IDENT>lang</IDENT> value</LANGUAGE></LANGUSAGE></PROFILEDESC>
<REVISIONDESC>
<CHANGE><DATE>8 June 1995</DATE>
<RESPSTMT><RESP>ed.</RESP>
<NAME>CMSMcQ</NAME></RESPSTMT>
<ITEM>install on TEI web server (changing DTD subset slightly)
</ITEM></CHANGE>
<CHANGE><DATE>7 June 1995</DATE>
<RESPSTMT><RESP>ed.</RESP>
<NAME>CMSMcQ</NAME></RESPSTMT>
<ITEM>Bring TeX and Script spelling corrections, etc. into SGML form.
</ITEM></CHANGE>
<CHANGE><DATE>2-3 June 1995</DATE>
<RESPSTMT><RESP>ed.</RESP>
<NAME>CMSMcQ</NAME></RESPSTMT>
<ITEM>Spellcheck, final (! ha!) changes, format, and print.  Many 
changes made only in TeX and Script versions.
</ITEM></CHANGE>
<CHANGE><DATE>29-30 May 1995</DATE>
<RESPSTMT><RESP></RESP>
<NAME>LB</NAME></RESPSTMT>
<ITEM>Last (ha!) pass. Cut down intro section. Moved divgen again.
Revised interp and index sections extensively and generally hacked.
</ITEM></CHANGE>
<CHANGE><DATE>24-25 May 1995</DATE>
<RESPSTMT><RESP></RESP>
<NAME>CMSMcQ</NAME></RESPSTMT>
<ITEM>changes as agreed with LB at ExCommittee meeting:  interp
section, rev. editorial tags, add def of TEI Lite, add section on
Making It Work with software, resettle divGen and index, begin
continuous pass through working from LB's notes</ITEM></CHANGE>
<CHANGE><DATE>15 May 1995</DATE>
<RESPSTMT><RESP></RESP>
<NAME>CMSMcQ</NAME></RESPSTMT>
<ITEM>begin last push prior to publication</ITEM></CHANGE>
<CHANGE><DATE>1 Dec 94 </DATE>
<RESPSTMT><RESP></RESP>
<NAME>LB</NAME></RESPSTMT>
<ITEM>retagged using TEI Lite
</ITEM>
</CHANGE>
<CHANGE><DATE>23 Jun 94 </DATE>
<RESPSTMT><RESP></RESP><NAME>LB</NAME></RESPSTMT>
<ITEM> change to use ODD-style tagdescs
</ITEM>
</CHANGE>
<CHANGE>
<DATE>1993-07-20</DATE>
<RESPSTMT><RESP>draft</RESP><NAME>CMSMcQ</NAME></RESPSTMT>
<ITEM>made file from old ED W21</ITEM>
</CHANGE></REVISIONDESC>
</TEIHEADER>
<TEXT>
<FRONT>
<TITLEPAGE>
<DOCTITLE>
<TITLEPART>
TEI Lite: An Introduction to Text Encoding for Interchange
</TITLEPART></DOCTITLE>
<DOCAUTHOR>Lou Burnard</DOCAUTHOR>
<DOCAUTHOR>C. M. Sperberg-McQueen</DOCAUTHOR>


<DOCIMPRINT>Document No:  TEI U 5</DOCIMPRINT>
<DOCDATE>June 1995</DOCDATE>
</TITLEPAGE>
<!-- not allowed here !  LB 23 jun 95 -->
<!-- <divgen type='toc'>-->
<!---->
</FRONT>
<BODY>
<div1 type="section"><!-- added to get around a stylesheet bug (bosak) -->
<P>This document provides an introduction to the
recommendations of the Text Encoding Initiative (TEI), by describing a
manageable subset of the full TEI encoding scheme. The scheme
documented here can  be used to encode a wide variety of commonly
encountered textual features, in such a way as to maximize the
usability of electronic transcriptions and to facilitate their
interchange among scholars using different computer systems. It is
also fully compatible with the full TEI scheme, as defined by TEI
document P3, <TITLE>Guidelines for Electronic Text Encoding and
Interchange</TITLE>, published in Chicago and Oxford in May 
1994.<NOTE PLACE="inline"
>Copies of the current version of this text may be found via the World
Wide Web at
<XREF><CODE>http://www-tei.uic.edu/orgs/tei/intros/teiu5.tei</CODE></XREF>
and <XREF><CODE>ftp://info.ox.ac.uk/pub/ota/TEI/doc/teiu5.tei</CODE></XREF>,
and at other sites mirroring these.
The document is also available in HTML form at
<XREF><CODE>http://www-tei.uic.edu/orgs/tei/intros/teiu5.html</CODE></XREF>
and <XREF><CODE>http://info.ox.ac.uk/~archive/teiu5.split.html</CODE></XREF>,
Copies of the formal SGML document type definition for 
the tag set described here may be found at the same locations,
under the file name <ident>teilite.dtd</ident>:
<XREF><CODE>ftp://www-tei.uic.edu/orgs/tei/p3/dtd/teilite.dtd</CODE></XREF>
and <XREF><CODE>ftp://info.ox.ac.uk/pub/ota/TEI/dtd/teilite.dtd</CODE></XREF>.
</NOTE></P>
</div1>
<!-- condense -->
<DIV1 TYPE="Section"><HEAD>Introduction</HEAD>
<P>The Text Encoding Initiative (TEI) Guidelines are addressed to
anyone who wants to interchange information stored in an electronic
form. They emphasize the interchange of textual information, but other
forms of information such as images and sound are also addressed. The
Guidelines are equally applicable in the creation of new resources and
in the interchange of existing ones.
</P>
<P>The Guidelines provide a means of making explicit certain features
of a text in such a way as to aid the processing of that text by
computer programs running on different machines. This process of
making explicit we call <TERM>markup</TERM> or <TERM>encoding</TERM>.
Any textual representation on a computer uses some form of markup; the
TEI came into being partly because of the enormous variety of mutually
incomprehensible encoding schemes currently besetting scholarship, and
partly because of the expanding range of scholarly uses now being
identified for texts in electronic form.
</P>
<P>The TEI Guidelines use the <TERM>Standard Generalized Markup
Language</TERM> (SGML) to define their encoding scheme. SGML is an
international standard (ISO 8879), used increasingly throughout the
information processing industries, which makes possible a formal
definition of an encoding scheme, in terms of <TERM>elements</TERM>
and
<TERM>attributes</TERM>, and rules governing their appearance within
a text. The TEI's use of SGML is ambitious in its complexity and
generality, but it is fundamentally no different from that of any
other SGML markup scheme, and so any general-purpose SGML-aware
software is able to process TEI-conformant texts.
</P>
<P>The TEI is sponsored by the Association for Computers and the
Humanities, the Association for Computational Linguistics, and the
Association for Literary and Linguistic Computing.  Funding has been
provided in part from the U.S. National Endowment for the Humanities,
Directorate General XIII of the Commission of the European
Communities, the Andrew W. Mellon Foundation, and the Social Science
and Humanities Research Council of Canada. Its Guidelines were
published in May 1994, after six years of development involving many
hundreds of scholars from different academic disciplines worldwide.
</P>
<P>At the outset of its work, the overall goals of the TEI were
defined by the closing statement of a planning conference held at
Vassar College, N.Y., in November, 1987; these <SOCALLED>Poughkeepsie
Principles</SOCALLED> were further elaborated in a series of design
documents.  The Guidelines, say these design documents,
<!-- <NOTE PLACE="foot"> <TITLE LEVEL="U">TEI ED P1:   Design
Principles for Text Encoding Guidelines</TITLE> (Chicago, Oxford: 
Text Encoding initiative, 1989); like other TEI technical documents,
available from the TEI.</NOTE> --> should:
<LIST TYPE="bullets">
<ITEM>suffice to represent the textual features needed for research;</ITEM>
<ITEM>be simple, clear, and concrete;</ITEM>
<ITEM>be easy for researchers to use without special-purpose
software;</ITEM>
<ITEM>allow the rigorous definition and efficient processing of
texts;</ITEM>
<ITEM>provide for user-defined extensions;</ITEM>
<ITEM>conform to existing and emergent standards.</ITEM>
</LIST></P>
<P>The world of scholarship is large and diverse. For the Guidelines
to have wide acceptability, it was important to ensure that:
<LIST TYPE="ordered">
<ITEM>the common core of textual features be easily shared;</ITEM>
<ITEM>additional specialist features be easy to add to (or remove
from) a text;</ITEM>
<ITEM>multiple parallel encodings of the same feature should be
possible;</ITEM>
<ITEM>the richness of markup should be user-defined, with a very
small minimal requirement;</ITEM>
<ITEM>adequate documentation of the text and its encoding should be
provided.</ITEM>
</LIST></P>
<P>The present document describes a manageable selection from the
extensive set of SGML elements and recommendations resulting from
those design goals, which is called <TITLE>TEI Lite</TITLE>.</P>
<P>In selecting from the  several hundred SGML elements defined by
the full TEI scheme, we have tried to identify a useful <SOCALLED>starter
set</SOCALLED>, comprising the elements which almost every user should
know about.  Experience working with TEI Lite will be invaluable in
understanding the full TEI DTD and in knowing which optional parts of
the full DTD are necessary for work with particular types of text.
</P>
<P>Our goals in defining this subset may be summarized as follows:
<LIST>
<ITEM>it should include most of the TEI <SOCALLED>core</SOCALLED>
tag set, since this contains elements relevant to virtually all text
types and all kinds of text-processing work;</ITEM>
<!-- deleted this item: there ARE no elements defined in the prose
tagset! -->
<!-- <ITEM>it should include most of the elements defined in the TEI
base tag set for prose;</ITEM> -->
<ITEM>it should be able to handle adequately a reasonably wide
variety of texts, at the level of detail found in existing practice
(as demonstrated in, for example, the holdings of the Oxford Text
Archive);</ITEM>
<ITEM>it should be useful for the production of new documents as well
as encoding of existing ones;</ITEM>
<ITEM>it should be usable with a wide range of existing SGML
software;</ITEM>
<ITEM>it should be derivable from the full TEI DTD using the
extension mechanisms described in the TEI Guidelines;</ITEM>
<ITEM>it should be as small and simple as is consistent with the
other goals.</ITEM></LIST> </P>
<P>The reader may judge our success in meeting these goals for him or
herself. At the time of writing, our confidence that we have at least
partially done so is borne out by its use in practice for the encoding
of real texts.  The Oxford Text Archive uses TEI Lite when it
translates texts from its holdings from their original markup schemes
into SGML; the Electronic Text Centers at the University of Virginia
and the University of Michigan have used TEI Lite to encode their
holdings. And the Text Encoding Initiative itself uses TEI Lite, in
its current technical documentation --- including this document.  </P>
<P>Although we have tried to make this document self-contained, as
suits a tutorial text, the reader should be aware that it does not
cover every detail of the TEI encoding scheme. All of the elements
described here are fully documented in the TEI Guidelines themselves,
which should be consulted for authoritative reference information on
these, and on the many others which are not described here.  Some
basic knowledge of SGML is assumed.</P>
</DIV1>
<DIV1><HEAD>A Short Example</HEAD>
<P>We begin with a short example, intended to show what happens when
a passage of prose is typed into a computer by someone with little
sense of the purpose of mark-up, or the potential of electronic texts.
 In an ideal world, such output might be generated by a very accurate
optical scanner.  It attempts to be faithful to the appearance of the
printed text, by retaining the original line breaks, by introducing
blanks to represent the layout of the original headings and page
breaks, and so forth. Where characters not available on the keyboard
are needed (such as the accented letter <MENTIONED>a</MENTIONED> in
<MENTIONED>fa&agrave;l</MENTIONED> or the long dash), it attempts to
mimic their appearance.</P>

<P><EG>                          CHAPTER 38

READER, I married him. A quiet wedding we had: he and I, the par-
son and clerk, were alone present. When we got back from church, I
went into the kitchen of the manor-house, where Mary was cooking
the dinner, and John cleaning the knives, and I said --
  'Mary, I have been married to Mr Rochester this morning.' The
housekeeper and her husband were of that decent, phlegmatic
order of people, to whom one may at any time safely communicate a
remarkable piece of news without incurring the danger of having
one's ears pierced by some shrill ejaculation and subsequently stunned
by a torrent of wordy wonderment. Mary did look up, and she did
stare at me; the ladle with which she was basting a pair of chickens
roasting at the fire, did for some three minutes hang suspended in air,
and for the same space of time John's knives also had rest from the
polishing process; but Mary, bending again over the roast, said only --
   'Have you, miss? Well, for sure!'
   A short time after she pursued, 'I seed you go out with the master,
but I didn't know you were gone to church to be wed'; and she
basted away. John, when I turned to him, was grinning from ear to
ear.
   'I telled Mary how it would be,' he said: 'I knew what Mr Ed-
ward' (John was an old servant, and had known his master when he
was the cadet of the house, therefore he often gave him his Christian
name) -- 'I knew what Mr Edward would do; and I was certain he
would not wait long either: and he's done right, for aught I know. I
wish you joy, miss!' and he politely pulled his forelock.
   'Thank you, John. Mr Rochester told me to give you and Mary
this.'
   I put into his hand a five-pound note.  Without waiting to hear
more, I left the kitchen. In passing the door of that sanctum some time
after, I caught the words --
   'She'll happen do better for him nor ony o' t' grand ladies.' And
again, 'If she ben't one o' th' handsomest, she's noan faa\l, and varry
good-natured; and i' his een she's fair beautiful, onybody may see
that.'
   I wrote to Moor House and to Cambridge immediately, to say what
I had done: fully explaining also why I had thus acted. Diana and

                            474

                 JANE EYRE                      475

Mary approved the step unreservedly. Diana announced that she
would just give me time to get over the honeymoon, and then she
would come and see me.
   'She had better not wait till then, Jane,' said Mr Rochester, when I
read her letter to him; 'if she does, she will be too late, for our honey-
moon will shine our life long: its beams will only fade over your
grave or mine.'
   How St John received the news I don't know: he never answered
the letter in which I communicated it: yet six months after he wrote
to me, without, however, mentioning Mr Rochester's name or allud-
ing to my marriage. His letter was then calm, and though very serious,
kind. He has maintained a regular, though not very frequent correspond-
ence ever since: he hopes I am happy, and trusts I am not of those who
live without God in the world, and only mind earthly things.
</EG>
<!---->
</P>
<P>This transcription suffers from a number of shortcomings:
<LIST TYPE="bullets">
<ITEM>the page numbers and running titles are intermingled with the
text in a way which makes it difficult for software to disentangle
them;</ITEM>
<ITEM>no distinction is made between single quotation marks and
apostrophe, so it is difficult to know exactly which passages are in
direct speech;
</ITEM>
<ITEM>the preservation of the copy text's hyphenation means that
simple-minded search programs will not find the broken words;
</ITEM>
<ITEM>the accented letter in <MENTIONED>fa&agrave;l</MENTIONED> and
the long dash have been rendered by ad hoc keying conventions which
follow no standard pattern and will be processed correctly only if the
transcriber remembers to mention them in the documentation;</ITEM>
<ITEM>paragraph divisions are marked only by the use of white space,
and hard carriage returns have been introduced at the end of each
line. Consequently, if the size of type used to print the text
changes, reformatting will be problematic.
</ITEM>
</LIST></P>
<P>We now present the same passage, as it might be encoded using the
TEI Guidelines. As we shall see, there are many ways in which this
encoding could be extended, but as a minimum, the TEI approach allows
us to represent the following distinctions:
<LIST TYPE="bullets">
<ITEM>Paragraph divisions are now marked explicitly.</ITEM>
<ITEM>Apostrophes are distinguished from quotation marks.</ITEM>
<ITEM>Entity references are used for the accented letter and the long
dash.</ITEM>
<ITEM>Page divisions have been marked with an empty <GI>pb</GI>
element alone.</ITEM>
<ITEM>To simplify searching and processing, the lineation of the
original has not been retained and words broken by typographic
accident at the end of a line have been re-assembled without comment.
If the original lineation were of interest, as it might be for an
important printing, it could easily be recorded, though it has not
been here.</ITEM>
<ITEM>For convenience of proof reading, a new line has been
introduced at the start of each paragraph, but the indentation is
removed.
</ITEM>
</LIST>
<EG><![CDATA [<pb n='474'>
<div1 type=chapter n='38'>

<p>Reader, I married him.  A quiet wedding we had: he and I,
the parson and clerk, were alone present.  When we got back
from church, I went into the kitchen of the manor-house,
where Mary was cooking the dinner, and John cleaning the
knives, and I said &dash;

<p><q>Mary, I have been married to Mr Rochester this
morning.</q> The housekeeper and her husband were of that
decent, phlegmatic order of people, to whom one may at any
time safely communicate a remarkable piece of news without
incurring the danger of having one's ears pierced by some
shrill ejaculation and subsequently stunned by a torrent of
wordy wonderment.  Mary did look up, and she did stare at
me; the ladle with which she was basting a pair of chickens
roasting at the fire, did for some three minutes hang
suspended in air, and for the same space of time John's
knives also had rest from the polishing process; but Mary,
bending again over the roast, said only &dash;

<p><q>Have you, miss? Well, for sure!</q>

<p>A short time after she pursued, <q>I seed you go out with
the master, but I didn't know you were gone to church to be
wed</q>; and she basted away.  John, when I turned to him,
was grinning from ear to ear.  <q>I telled Mary how it would
be,</q> he said: <q>I knew what Mr Edward</q> (John was an
old servant, and had known his master when he was the cadet
of the house, therefore he often gave him his Christian
name) &dash; <q>I knew what Mr Edward would do; and I was
certain he would not wait long either: and he's done right,
for aught I know.  I wish you joy, miss!</q> and he politely
pulled his forelock.

<p><q>Thank you, John.  Mr Rochester told me to give you and
Mary this.</q>

<p>I put into his hand a five-pound note.  Without waiting
to hear more, I left the kitchen.  In passing the door of
that sanctum some time after, I caught the words &dash;

<p><q>She'll happen do better for him nor ony o' t' grand
ladies.</q> And again, <q>If she ben't one o' th'
handsomest, she's noan fa&agrave;l, and varry good-natured;
and i' his een she's fair beautiful, onybody may see
that.</q>

<p>I wrote to Moor House and to Cambridge immediately, to
say what I had done: fully explaining also why I had thus
acted.  Diana and <pb n='475'> Mary approved the step
unreservedly.  Diana announced that she would just give me
time to get over the honeymoon, and then she would come and
see me.

<p><q>She had better not wait till then, Jane,</q> said Mr
Rochester, when I read her letter to him; <q>if she does,
she will be too late, for our honeymoon will shine our life
long: its beams will only fade over your grave or mine.</q>

<p>How St John received the news I don't know: he never
answered the letter in which I communicated it: yet six
months after he wrote to me, without, however, mentioning Mr
Rochester's name or alluding to my marriage.  His letter was
then calm, and though very serious, kind.  He has maintained
a regular, though not very frequent correspondence ever
since: he hopes I am happy, and trusts I am not of those who
live without God in the world, and only mind earthly things.
]]>
</EG>
</P>
<P>The decision to focus on Bront&euml;'s text, rather than on the
printing of it in this particular edition, is one aspect of a
fundamental encoding issue: that of selectivity. An encoding makes
explicit only those textual features of importance to the encoder.  It
is not difficult to think of ways in which the encoding of even this
short passage might readily be extended. For example:
<LIST>
<ITEM>a regularized form of the passages in dialect could be
provided;</ITEM>
<ITEM>footnotes glossing or commenting on any passage could be added;</ITEM>
<ITEM>pointers linking parts of this text to others could be added;</ITEM>
<ITEM>proper names of various kinds could be distinguished from the
surrounding text;</ITEM>
<ITEM>detailed bibliographic information about the text's provenance
and context could be prefixed to it;</ITEM>
<ITEM>a linguistic analysis of the passage into sentences, clauses,
words, etc., could be provided, each unit being associated with
appropriate category codes;</ITEM>
<ITEM>the text could be segmented into narrative or discourse units;</ITEM>
<ITEM>systematic analysis or interpretation of the text could be
included in the encoding, with potentially complex alignment or
linkage between the text and the analysis, or between the text and one
or more translations of it;</ITEM>
<ITEM>passages in the text could be linked to images or sound held on
other media.</ITEM>
</LIST></P>
<P>The TEI-recommended way of carrying all of these out is described
in the remainder of this document. The TEI scheme as a whole also
provides for an enormous range of other possibilities, of which we
cite only a few:
<LIST>
<ITEM>detailed analysis of the components of names;</ITEM>
<ITEM>detailed meta-information providing thesaurus-style information
about the text's origins or topics;</ITEM>
<ITEM>information about the printing history or manuscript variations
exhibited by a particular series of versions of the text.</ITEM>
</LIST> For recommendations on these and many other possibilities, the
full Guidelines should be consulted.
</P></DIV1>
<DIV1><HEAD>The Structure of a TEI Text</HEAD>
<P>All TEI-conformant texts contain (a) a <TERM>TEI header</TERM>
(marked up as a <GI>teiHeader</GI> element) and (b) the transcription
of the text proper (marked up as a <GI>text</GI> element).
</P>
<P>The TEI header provides information analogous to that provided by
the title page of a printed text.  It has up to four parts: a
bibliographic description of the machine-readable text, a description
of the way it has been encoded, a non-bibliographic description of the
text (a <TERM>text profile</TERM>), and a revision history. The
header is described in more detail in section <PTR TARGET="teihead">.
</P>
<P>A TEI text may be <TERM>unitary</TERM> (a single work) or
<TERM>composite</TERM> (a collection of single works, such as an
anthology). In either case, the text may have an optional <TERM>front</TERM>
or <TERM>back</TERM>. In between is the <TERM>body</TERM> of the
text, which, in the case of a composite text, may consist of
<TERM>group</TERM>s, each containing more groups or texts.
</P>
<P>A unitary text will be encoded using an overall structure like
this:
<EG><![CDATA [<TEI.2>
    <teiHeader> [ TEI Header information ]  </teiHeader>
    <text>
        <front> [ front matter ... ]   </front>
        <body>  [ body of text ... ]   </body>
        <back>  [ back matter ...  ]   </back>
    </text>
</TEI.2>
]]></EG></P>
<P>A composite text also has an optional front and back. In between
occur one or more groups of texts, each with its own optional front
and back matter. A composite text will thus be encoded using an
overall structure like this:
<EG><![CDATA [<TEI.2>
    <teiHeader> [ header information for the composite ] </teiHeader>
    <text>
        <front> [ front matter for the composite  ]      </front>
        <group>
           <text>
              <front> [ front matter of first text ] </front>
              <body>  [ body of first text  ]          </body>
              <back>  [ back matter of first text ]    </back>
           </text>
           <text>
              <front> [ front matter of second text]  </front>
              <body>  [ body of second text  ]          </body>
              <back>  [ back matter of second text ]    </back>
           </text>
           [ more texts or groups of texts here ]
        </group>
        <back>      [ back matter for the composite  ]      </back>
    </text>
</TEI.2>
]]>
</EG> </P>
<P>It is also possible to define a composite of TEI texts, each with
its own header. Such a collection is known as a <TERM>TEI corpus</TERM>,
and may itself have a header:
<EG><![CDATA [<teiCorpus>
  <teiHeader>   [header information for the corpus]</teiHeader>
  <TEI.2>
     <teiHeader>[header information for first text]</teiHeader>
     <text>     [first text in corpus]             </text>
  </TEI.2>
  <TEI.2>
    <teiHeader>[header information for second text]</teiHeader>
    <text>     [second text in corpus]             </text>
  </TEI.2>
</teiCorpus>
]]>
</EG> It is not however possible to create a composite of corpora --
that is, a number of <GI>teiCorpus</GI> elements combined together
and treated as a single object. This is a restriction of the current
version of the TEI Guidelines.</P>
<P>In the remainder of this document, we discuss chiefly simple text
structures. The discussion in each case consists of a short list of
relevant TEI <TERM>elements</TERM> with a brief definition of each,
followed by definitions for any <TERM>attributes</TERM> specific to
that element. In most cases, short examples are also given.
<!---->
</P></DIV1>
<DIV1><HEAD>Encoding the Body</HEAD>
<P>As indicated above, a simple TEI document at the textual level
consists of the following elements:
<LIST TYPE="gloss">
<LABEL><GI>front</GI></LABEL>
<ITEM>contains any prefatory matter (headers, title page, prefaces,
dedications, etc.) found before the start of a text proper.</ITEM>
<LABEL><GI>group</GI></LABEL>
<ITEM>contains a number of unitary texts or groups of texts.</ITEM>
<LABEL><GI>body</GI></LABEL>
<ITEM>contains the whole body of a single unitary text, excluding any
front or back matter.</ITEM>
<LABEL><GI>back</GI></LABEL>
<ITEM>contains any appendixes, etc., following the main part of a
text.</ITEM>
</LIST>    Elements specific to front and back matter are described
below in section <PTR TARGET="fronbac">. In this section we discuss
the elements making up the body of a text. </P>
<DIV2 ID="divs" TYPE="div2"><HEAD>Text Division Elements</HEAD>
<P>The body of a prose text may be just a series of paragraphs, or
these paragraphs may be grouped together into chapters, sections,
subsections, etc. In the former case, each paragraph is tagged using
the <GI>p</GI> tag. In the latter case, the <GI>body</GI> may be
divided either into a series of <GI>div1</GI> elements, or into a
series of
<GI>div</GI> elements, either of which may be further subdivided, as
discussed below:
<LIST TYPE="gloss">
<LABEL><GI>p</GI></LABEL>
<ITEM>marks paragraphs in prose.</ITEM>
<LABEL><GI>div</GI></LABEL>
<ITEM>contains a subdivision of the front, body, or back of a text.</ITEM>
<LABEL><GI>div1</GI></LABEL>
<ITEM>contains a first-level subdivision of the front, body, or back
of a text (the largest, if
<GI>div0</GI> is not used, the second largest if it is).</ITEM>
</LIST>
</P>
<P>When structural subdivisions smaller than a <GI>div1</GI> are
necessary, a <GI>div1</GI> may be divided into <GI>div2</GI>
elements, a <GI>div2</GI> into smaller <GI>div3</GI> elements, etc.,
down to the level of <GI>div7</GI>.  If more than seven levels of
structural division are present, one must either modify the TEI tag
set to accept
<GI TEI="no">div8</GI>, etc., or else use the unnumbered <GI>div</GI>
element:  a <GI>div</GI> may be subdivided by smaller <GI>div</GI>
elements, without limit to the depth of nesting.
</P>
<P>All these <TERM>division</TERM> elements take the following three
attributes:
<LIST TYPE="gloss">
<LABEL><IDENT>type</IDENT></LABEL>
<ITEM>This indicates the conventional name for this category of text
division.  Its value will typically be <Q REND="inline" TYPE="inline">Book</Q>,
<Q REND="inline" TYPE="inline">Chapter</Q>,
<Q REND="inline" TYPE="inline">Poem</Q>, etc.  Other possible values
include <Q REND="inline" TYPE="inline">Group</Q> for groups of poems,
etc., treated as a single unit, <Q REND="inline" TYPE="inline">Sonnet</Q>,
<Q REND="inline" TYPE="inline">Speech</Q>, and <Q REND="inline" TYPE="inline"
>Song</Q>.  Note that whatever value is supplied for the <IDENT>type</IDENT>
attribute  of the first <GI>div</GI>, <GI>div1</GI>, <GI>div2</GI>,
etc., in a text is assumed to apply for all subsequent <GI>div</GI>,
<GI>div1</GI>s (etc.) within the same <GI>body</GI>. This implies
that a value must be given for the first division element of each
type, or whenever the value changes. </ITEM>
<LABEL><IDENT>id</IDENT></LABEL>
<ITEM>This specifies a unique identifier for the division, which may
be used for cross references or other links to it, such as a
commentary, as further discussed in section <PTR TARGET="xr">. It is
often useful to provide an <IDENT>id</IDENT> attribute for every
major structural unit in a text, and to derive the ID values in some
systematic way, for example by appending a section number to a short
code for the title of the work in question, as in the examples below.</ITEM>
<LABEL><IDENT>n</IDENT></LABEL>
<ITEM>The <IDENT>n</IDENT> attribute specifies a mnemonic short name
or number for the division, which can be used to identify it in
preference to the ID.  If a conventional form of reference or
abbreviation for the parts of a work already exists (such as the
book/chapter/verse pattern of Biblical citations), the <IDENT>n</IDENT>
attribute is the place to record it.
</ITEM>
</LIST> The attributes <IDENT>id</IDENT> and <IDENT>n</IDENT>,
indeed, are so widely useful that they are allowed on any element in
any TEI DTD:  they are <TERM>global attributes</TERM>.  Other global
attributes defined in the TEI Lite scheme are discussed in section
<PTR TARGET="xatts">.
</P>
<P>The value of every <IDENT>id</IDENT> attribute must be unique
within a document. One simple way of ensuring that this is so is to
make it reflect the hierarchic structure of the document. For example,
Smith's
<TITLE>Wealth of Nations</TITLE> as first published consists of five
books, each of which is divided into chapters, while some chapters are
further subdivided into parts. We might define <IDENT>id</IDENT>
values for this structure as follows:
<EG><![CDATA [<div1 id=WN1 n='I' type='book'>
  <div2 id=WN101 n='I.1' type='chapter'>
   ... </div2>
  <div2 id=WN102 n='I.2' type='chapter'>
   ... </div2>
   ...
  <div2 id=WN110 n='I.10' type='chapter'>
     <div3 id=WN1101 n='I.10.1' type=part>
      ... </div3>
     <div3 id=WN1102 n='I.10.2' type=part>
      ... </div3>
  </div2>
  ...
</div1>
<div1 id=WN2 n='II' type='book'>
   ....
</div1>
...

]]>
</EG> </P>
<P>A different numbering scheme may be used for <IDENT>id</IDENT>
and
<IDENT>n</IDENT> attributes:  this is often useful where a canonical
reference scheme is used which does not tally with the structure of
the work.  For example, in a novel divided into books each containing
chapters, where the chapters are numbered sequentially through the
whole work, rather than within each book, one might use a scheme such
as the following:
<EG><![CDATA [<div1 id=TS01 n='1' type='Volume'>
   <div2 id=TS011 n='1' type='Chapter'>
      ...
   <div2 id=TS012 n='2'>
      ...
</div1>
<div1 id=TS02 n='2' type='Volume'>
   <div2 id=TS021 n='3'type='Chapter'>
      ...
   <div2 id=TS022 n='4'>
      ...
</div1>
]]>
</EG>   Here the work has two volumes, each containing two chapters.
The chapters are numbered conventionally 1 to 4, but the <IDENT>id</IDENT>
values specified allow them to be regarded additionally as if they
were numbered 1.1, 1.2, 2.1, 2.2.
</P></DIV2>
<DIV2 ID="h25"><HEAD>Headings and Closings</HEAD>
<P>Every <GI>div</GI>, <GI>div1</GI>, <GI>div2</GI>, etc., may
have a title or heading at its start, and (less commonly) a closing
such as <Q REND="inline" TYPE="inline">End of Chapter 1</Q>.  The
following elements may be used to transcribe them:
<LIST TYPE="gloss">
<LABEL><GI>head</GI></LABEL>
<ITEM>contains any heading, for example, the title of a section, or
the heading of a list or glossary.</ITEM>
<LABEL><GI>trailer</GI></LABEL>
<ITEM>contains a closing title or footer appearing at the end of  a
division of a text. </ITEM>
</LIST> Some other elements which may be necessary at the beginning or
ending of text divisions are discussed below in section <PTR TARGET="h52">
 .
</P>
<P>Whether or not headings and trailers are included in a
transcription is a matter for the individual transcriber to decide.
Where a heading is completely regular (for example <Q>Chapter 1</Q>)
or has been given as an attribute value (e.g. <GI>div1 type='Chapter'
n=1</GI>), it may be omitted; where it contains otherwise
unrecoverable text it should always be included.  For example, the
start of Hardy's <TITLE>Under the Greenwood Tree</TITLE> might be
encoded as follows:
<EG><![CDATA [<div1 id=UGT1 n='Winter' type='Part'>
<div2 id=UGT11 n='1' type='Chapter'>
<head>Mellstock-Lane</head>
<p>To dwellers in a wood almost every species of tree ...
]]>
</EG>
</P></DIV2>
<DIV2 ID="vedr"><HEAD>Prose, Verse and Drama</HEAD>
<P>As noted above, the paragraphs making up a textual division should
be tagged with the <GI>p</GI> tag. For example:
<EG><![CDATA [<body>
<p>I fully appreciate Gen. Pope's splendid achievements
with their invaluable results; but you must know that
Major Generalships in the Regular Army, are not as
plenty as blackberries.
</p>
</body>
]]>
</EG>
<!-- Is this quote right? shouldn't it be "plentIFUL"? --> 
<!-- A. Lincoln to Richard Yates and William Butler, 10 Apr 1862, -->
<!--  Library of America, Lincoln, v. 2 p. 315.                -->
<!---->
</P>
<P>A number of different tags are provided for the encoding of the
structural components of verse and performance texts (drama, film,
etc.):
<LIST TYPE="gloss">
<LABEL><GI>l</GI></LABEL>
<ITEM>contains a single, possibly incomplete, line of verse.
Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>part</IDENT></LABEL>
<ITEM>specifies whether or not the line is metrically complete. Legal
values are:
<CODE>F</CODE> for the final part of an incomplete line,
<CODE>Y</CODE> if the line is metrically incomplete,
<CODE>N</CODE> if the line is complete, or if no claim is made as to
its completeness,
<CODE>I</CODE> for the initial part of an incomplete line,
<CODE>M</CODE>  for a medial part of an incomplete line.
</ITEM></LIST></ITEM>
<LABEL><GI>lg</GI></LABEL>
<ITEM>contains a group of verse lines functioning as a formal unit
e.g. a stanza, refrain, verse paragraph, etc.</ITEM>
<LABEL><GI>sp</GI></LABEL>
<ITEM>contains an individual speech in a performance text, or a
passage  presented as such in a prose or verse text. Attributes
include:
<LIST TYPE="gloss">
<LABEL><IDENT>who</IDENT></LABEL>
<ITEM>identifies the speaker of the part by supplying an ID.</ITEM>
</LIST></ITEM>
<LABEL><GI>speaker</GI></LABEL>
<ITEM>contains a special form of heading or label, giving the name of
one or more speakers in a performance text or fragment.</ITEM>
<LABEL><GI>stage</GI></LABEL>
<ITEM>contains any kind of stage direction within a performance text
or fragment. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>type</IDENT></LABEL>
<ITEM>indicates the kind of stage direction. Suggested values include
 <KW>entrance</KW>, <KW>exit</KW>, <KW>setting</KW>,
<KW>delivery</KW>, etc.</ITEM></LIST></ITEM></LIST>
</P>
<P>Here, for example, is the start of a poetic text in which verse
lines and stanzas are tagged:
<EG><![CDATA [<lg n=I>
<l>I Sing the progresse of a
   deathlesse soule,</l>
<l>Whom Fate, with God made,
  but doth not controule,</l>
<l>Plac'd in most shapes; all times
  before the law</l>
<l>Yoak'd us, and when, and since,
  in this I sing.</l>
<l>And the great world to his aged evening;</l>
<l>From infant morne, through manly noone I draw.</l>
<l>What the gold Chaldee, of silver Persian saw,</l>
<l>Greeke brass, or Roman iron, is in this one;</l>
<l>A worke t'out weare Seths pillars, bricke and stone,</l>
<l>And (holy writs excepted) made to yeeld to none,</l>
</lg>
]]></EG>
</P>
<P>Note that the <GI>l</GI> element marks verse, not typographic
lines: the original lineation of the first few lines above has not
therefore been made explicit by this encoding, and may be lost. The
<GI>lb</GI> element described in section <PTR TARGET="pln"> may be
used to mark typographic lines if so desired.
</P>
<P>Sometimes, particularly in dramatic texts, verse lines are split
between speakers. The easiest way of encoding this is to use the
<IDENT>part</IDENT> attribute to indicate that the lines so
fragmented are incomplete, as in this example:
<EG><![CDATA [<div1 type ='Act' n='I'><head>ACT I</head>
<div2 type ='Scene' n='1'><head>SCENE I</head>
<stage rend=italic>
Enter Barnardo and Francisco, two Sentinels, at several doors</stage>
<sp><speaker>Barn<l part=Y>Who's there?
<sp><speaker>Fran<l>Nay, answer me. Stand and unfold yourself.
<sp><speaker>Barn<l part=i>Long live the King!
<sp><speaker>Fran<l part=m>Barnardo?
<sp><speaker>Barn<l part=f>He.
<sp><speaker>Fran<l>You come most carefully upon your hour.
]]>
</EG> </P>
<P>The same mechanism may be applied to stanzas which are divided
between two speakers:
<EG><![CDATA [<sp><speaker>First voice</speaker>
<lg type=stanza part=I>
<l>But why drives on that ship so fast
<l>Withouten wave or wind?
</lg>
<sp><speaker>Second Voice</speaker>
<lg part=F>
<l>The air is cut away before.
<l>And closes from behind.
</lg>
]]>
</EG> </P>
<P>This example shows how dialogue presented in a prose work as if it
were drama should be encoded. It also demonstrates the use of the
<IDENT>who</IDENT> attribute to bear a code identifying the speaker
of the piece of dialogue concerned:
<EG><![CDATA [<sp who=OPI><speaker>The reverend Doctor Opimiam</speaker>
<p>I do not think I have named a single unpresentable fish.
<sp who=GRM><speaker>Mr Gryll</speaker>
<p>Bream, Doctor: there is not much to be said for bream.
<sp who=OPI><speaker>The Reverend Doctor Opimiam</speaker>
<p>On the contrary, sir, I think there is much to be said for him.
In the first place....
<p>Fish, Miss Gryll -- I could discourse to you on fish by
the hour:  but for the present I will forbear...
</sp>
]]>
</EG><!-- Peacock, Gryll Grange, p 778 -->
</P></DIV2></DIV1>
<DIV1 ID="pln"><HEAD>Page and Line Numbers</HEAD>
<P>Page and line breaks may be marked with the following empty
elements.
<LIST TYPE="gloss">
<LABEL><GI>pb</GI></LABEL>
<ITEM>marks the boundary between one page of a text and the next in a
standard reference system.
</ITEM>
<LABEL><GI>lb</GI></LABEL>
<ITEM>marks the start of a new (typographic) line in some  edition or
version of a text.
</ITEM>
</LIST>    These elements mark a single point in the text, not a span
of text. The global <IDENT>n</IDENT> attribute should be used to
supply the number of the page or line beginning at the tag. In
addition, these two elements share the following attribute:
<LIST TYPE="gloss">
<LABEL><IDENT>ed</IDENT></LABEL>
<ITEM>indicates the edition or version in which the page break is
located at this point.</ITEM>
</LIST></P>
<P>When working from a paginated original, it is often useful to
record its pagination, if only to simplify later proof-reading.
Recording the line breaks may be useful for the same reason; treatment
of end-of-line hyphenation in printed source texts will require some
consideration.
</P>
<P>If pagination, etc., are marked for more than one edition, specify
the edition in question using the <IDENT>ed</IDENT> attribute, and
supply as many tags are necessary. For example, in the following
passage we indicate where the page breaks occur in two different
editions (<IDENT>ED1</IDENT> and <IDENT>ED2</IDENT>)
<EG><![CDATA [<p>I wrote to Moor House and to Cambridge immediately, to
say what I had done: fully explaining also why I had thus
acted.  Diana and <pb ed=ED1 n='475'> Mary approved the
step unreservedly.  Diana announced that she would
<pb ed=ED2 n='485'>just give me time to get over the
honeymoon, and then she would come and see me.
]]>
</EG></P>
<P>The <GI>pb</GI> and <GI>lb</GI> elements are special cases of
the general class of <TERM>milestone</TERM> elements which mark
reference points within a text.  TEI Lite also includes a generic
<GI>milestone</GI> element, which is not restricted to special cases
but can  mark any kind of reference point:  for example, a column
break, the start of a new kind of section not otherwise tagged, etc.
This element has the following description and attributes:
</P>
<LIST TYPE="gloss">
<LABEL><GI>milestone</GI></LABEL>
<ITEM>marks the boundary between sections of a text, as indicated by
changes in a standard reference system. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>ed</IDENT></LABEL>
<ITEM>indicates the edition or version to which the milestone applies.</ITEM>
<LABEL><IDENT>unit</IDENT></LABEL>
<ITEM>indicates what kind of section is changing at this milestone.
</ITEM></LIST>
</ITEM></LIST>
<P>The names used for types of unit and for editions referred to by
the
<IDENT>ed</IDENT> and <IDENT>unit</IDENT> attributes may be chosen
freely, but should be documented in the header.</P>
<P>The <GI>milestone</GI> element may be used to replace the others,
or the others may be used as a set; they should not be mixed
arbitrarily.</P></DIV1>
<DIV1><HEAD>Marking Highlighted Phrases</HEAD>
<DIV2 ID="faces"><HEAD>Changes of Typeface, etc.</HEAD>
<P>Highlighted words or phrases are those made visibly different from
the rest of the text, typically by a change of type font, handwriting
style, or ink color, intended to draw the reader's attention to them.
</P>
<P>The global <IDENT>rend</IDENT> attribute can be attached to any
element, and used wherever necessary to specify details of the
highlighting used for it. For example, a heading rendered in bold
might be tagged <CODE LANG="sgml">head rend='Bold'</CODE>, and one in
italic <CODE LANG="sgml">head rend='Italic'</CODE>.
</P>
<P>It is not always possible or desirable to interpret the reasons
for such changes of rendering in a text.  In such cases, the element
<GI>hi</GI> may be used to mark a sequence of  highlighted text
without making any claim as to its status.
<LIST TYPE="gloss">
<LABEL><GI>hi</GI></LABEL>
<ITEM>marks a word or phrase as graphically distinct from the
surrounding text, for reasons concerning which no claim is made.
</ITEM>
</LIST></P>
<P>In the following example, the use of a distinct typeface for the
subheading and for the included name are recorded but not interpreted:
<EG><![CDATA [<hi rend=gothic>And this Indenture further witnesseth</hi>
that the said <hi rend=italic>Walter Shandy</hi>, merchant,
in consideration of the said intended marriage ...
]]>
</EG>
</P>
<P>Alternatively, where the cause for the highlighting can be
identified with confidence, a number of other, more specific, elements
are available.
<LIST TYPE="gloss">
<LABEL><GI>emph</GI></LABEL>
<ITEM>marks words or phrases which are stressed or emphasized for
linguistic or rhetorical effect.</ITEM>
<LABEL><GI>foreign</GI></LABEL>
<ITEM>identifies a word or phrase as belonging to some language other
than that of the surrounding text. </ITEM>
<LABEL><GI>mentioned</GI></LABEL>
<ITEM>marks words or phrases mentioned, not used.</ITEM>
<LABEL><GI>term</GI></LABEL>
<ITEM>contains a single-word, multi-word or symbolic designation
which is regarded as a technical term.</ITEM>
<LABEL><GI>title</GI></LABEL>
<ITEM>contains the title of a work, whether article, book, journal,
or series, including any alternative titles or subtitles. Attributes
include:
<LIST TYPE="gloss">
<LABEL><IDENT>level</IDENT></LABEL>
<ITEM>indicates whether this is the title of an article, book,
journal, series, or unpublished material. Legal values are:
<KW>m</KW> for monographic title (book, collection, or other item
published as a distinct item, including single volumes of multi-volume
works); <KW>s</KW> (series title); <KW>j</KW> (journal title);
<KW>u</KW> for title of unpublished material (including theses and
dissertations unless published by a commercial press); <KW>a</KW> for
analytic title (article, poem, or other item published as part of a
larger item).</ITEM>
<LABEL><IDENT>type</IDENT></LABEL>
<ITEM>classifies the title according to some convenient typology.
Sample values include:
<CODE>abbreviated</CODE>, <CODE>main</CODE>, <CODE>subordinate</CODE>
(for subtitles and titles of parts), and <CODE>parallel</CODE> (for
alternate titles, often in another language, by which the work is also
known).</ITEM></LIST></ITEM>
</LIST></P>
<P>Some features (notably quotations and glosses) may be found in a
text either marked by highlighting, or with quotation marks.  In
either case, the elements <GI>q</GI> and <GI>gloss</GI> (as
discussed in the following section) should be used. If the rendition
is to be recorded, use the global <IDENT>rend</IDENT> attribute.
</P>
<P>As an example of the elements defined here, consider the following
sentence:
<!-- Theodore M. Andersson, Preface to the Nibelungenlied, p. 3 -->
<Q REND="display">On the one hand the <TITLE>Nibelungenlied</TITLE>
is associated with the new rise of romance of twelfth-century France,
the
<HI REND="ital">romans d'antiquit&eacute;</HI>, the romances of Chr&eacute;tien
de Troyes, and the German adaptations of these works by Heinrich van
Veldeke, Hartmann von Aue, and Wolfram von Eschenbach.
</Q> Interpreting the role of the highlighting, the sentence might
look like this:
<EG><![CDATA [On the one hand the <title>Nibelungenlied</title> is associated
with the new rise of romance of twelfth-century France, the
<foreign>romans d'antiquit&eacute;</foreign>, the romances of
Chr&eacute;tien de Troyes, ...
]]>
</EG>  Describing only the appearance of the original, it might look
like this:
<EG><![CDATA [On the one hand the <hi rend=italic>Nibelungenlied</hi>
is associated with the new rise of romance of twelfth-century
France, the <hi rend=italic>romans
d'antiquit&eacute;</hi>, the romances of
Chr&eacute;tien de Troyes, ...
]]>
</EG>
<!---->
</P></DIV2>
<DIV2 ID="z635"><HEAD>Quotations and Related Features</HEAD>
<P>Like changes of typeface, quotation marks are conventionally used
to denote several different features within a text, of which the most
frequent is quotation.  When possible, we recommend that the
underlying feature be tagged, rather than the simple fact that
quotation marks appear in the text, using the following elements:
<LIST TYPE="gloss">
<LABEL><GI>q</GI></LABEL>
<ITEM>contains a quotation or apparent quotation --- a representation
of speech or thought marked as being quoted from someone else (whether
in fact quoted or not); in narrative, the words are usually those of
a character or speaker; in dictionaries, <GI>q</GI> may be used to
mark real or contrived examples of usage.  Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>type</IDENT></LABEL>
<ITEM>may be used to indicate whether the quoted matter is spoken or
thought, or to characterize it more finely. Sample values include:
<KW>spoken</KW> (for representation of direct speech, usually marked
by quotation marks) and <KW>thought</KW> (for representation of
thought, e.g. internal monologue).</ITEM>
<LABEL><IDENT>who</IDENT></LABEL>
<ITEM>identifies the speaker of a piece of direct speech.</ITEM>
</LIST></ITEM>
<LABEL><GI>mentioned</GI></LABEL>
<ITEM>marks words or phrases mentioned, not used.</ITEM>
<LABEL><GI>soCalled</GI></LABEL>
<ITEM>contains a word or phrase for which the author or narrator
indicates a disclaiming of responsibility, for example by the use of
scare quotes or italics.</ITEM>
<LABEL><GI>gloss</GI></LABEL>
<ITEM>marks a word or phrase which provides a gloss or definition for
some other word or phrase.  Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>target</IDENT></LABEL>
<ITEM>identifies the associated word or phrase.</ITEM>
</LIST>
</ITEM>
</LIST>   Here is a simple example of a quotation:
<EG><![CDATA [Few dictionary makers are likely to forget
Dr. Johnson's description of the
lexicographer as <q>a harmless drudge.</q>
]]>
</EG> </P>
<P>To record how a quotation was printed (for example,
<TERM>in-line</TERM> or set off as a <TERM>display</TERM> or
<TERM>block quotation</TERM>), the <IDENT>rend</IDENT> attribute
should be used. This may also be used to indicate the kind of
quotation marks used.
</P>
<P>Direct speech interrupted by a narrator can be represented simply
by ending the quotation and beginning it again after the interruption,
as in the following example:
<EG><![CDATA [<p><q>Who-e debel you?</q> &mdash; he at last said &mdash; <q>you
no speak-e, damme, I kill-e.</q>  And so saying, the lighted
tomahawk began flourishing about me in the dark.
]]>
</EG>If it is important to convey the idea that the two <GI>q</GI>
elements together reproduce a single speech, the linking attributes
<IDENT>next</IDENT> and
<IDENT>prev</IDENT> may be used, as described in section <PTR TARGET="xatts"
>.
</P>
<P>Quotations may be accompanied by a reference to the source or
speaker, using the <IDENT>who</IDENT> attribute, whether or not the
source is given in the text, as in the following example:
<EG><![CDATA [<q who=Wilson>Spaulding, he came down into the office just this
day eight weeks with this very paper in his hand, and he
says:&mdash;<q who=Spaulding>I wish to the Lord, Mr. Wilson, that
I was a red-headed man.</q></q>
]]>
</EG>
<!----> This example also demonstrates how quotations may be embedded
within other quotations: one speaker (Wilson) quotes another speaker
(Spaulding).
</P>
<P>The creator of the electronic text must decide whether quotation
marks are replaced by the tags or whether the tags are added and the
quotation marks kept. If the quotation marks are removed from the
text, the <IDENT>rend</IDENT> attribute may be used to record the way
in which they were rendered in the copy text.
</P>
<P>As with highlighting, it is not always possible and may not be
considered desirable to interpret the function of quotation marks in a
text in this way.  In such cases, the tag <GI>hi rend=quoted</GI>
might be used to mark quoted text without making any claim as to its
status.
<!---->
</P></DIV2>
<DIV2 ID="z636"><HEAD>Foreign Words or Expressions</HEAD>
<P>Words or phrases which are not in the main language of the text
may be tagged as such in one of two ways. If the word or phrase is
already tagged for some reason, the element indicated should bear a
value for the global <IDENT>lang</IDENT> attribute indicating the
language used. Where there is no applicable element, the element
<GI>foreign</GI> may be used, again using the <IDENT>lang</IDENT>
attribute.  For example:
<EG><![CDATA [John has real <foreign lang=fra>savoir-faire</foreign>.
]]></EG><EG><![CDATA [Have you read <title lang=deu>Die Dreigroschenoper</title>?
]]></EG><EG><![CDATA [<mentioned lang=fra>Savoir-faire</mentioned> is French for know-how.
]]></EG><EG><![CDATA [The court issued a writ of <term lang=lat>mandamus</term>.
]]>
</EG></P>
<P>As these examples show, the <GI>foreign</GI> element should not
be used to tag foreign words if some other more specific element such
as <GI>title</GI>, <GI>mentioned</GI>, or <GI>term</GI> applies.
The global <IDENT>lang</IDENT> attribute may be attached to any
element to show that it uses some other language than that of the
surrounding text.
</P></DIV2></DIV1>
<DIV1 ID="z633"><HEAD>Notes</HEAD>
<P>All notes, whether printed as footnotes, endnotes, marginalia, or
elsewhere, should be marked using the same element:
<LIST TYPE="gloss">
<LABEL><GI>note</GI></LABEL>
<ITEM>contains a note or annotation. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>type</IDENT></LABEL>
<ITEM>describes the type of note.</ITEM>
<LABEL><IDENT>resp</IDENT></LABEL>
<ITEM>indicates who is responsible for the annotation: author,
editor, translator, etc. The value might be
<CODE>author</CODE>,
<CODE>editor</CODE>, etc., or the initials of the individual who
added the annotation.
</ITEM>
<LABEL><IDENT>place</IDENT></LABEL>
<ITEM>indicates where the note appears in the source text. Sample
values include <CODE>inline</CODE>, <CODE>interlinear</CODE>, <CODE>left</CODE>,
<CODE>right</CODE>, <CODE>foot</CODE>,  and <CODE>end</CODE>, for
notes which appear as marked paragraphs in the body of the text,
between the lines, in the left or right margin, at the foot of the
page, or at the end of the chapter or volume, respectively.</ITEM>
<LABEL><IDENT>target</IDENT></LABEL>
<ITEM>indicates the point of attachment of a note, or the beginning
of the span to which the note is attached.</ITEM>
<LABEL><IDENT>targetEnd</IDENT></LABEL>
<ITEM>points to the end of the span to which the note is attached, if
the note is not embedded in the text at that point.</ITEM>
<LABEL><IDENT>anchored</IDENT></LABEL>
<ITEM>indicates whether the copy text shows the exact place of
reference for the note.</ITEM></LIST>
</ITEM>
</LIST>   Where possible, the body of a note should be inserted in the
text at the point at which its identifier or mark first appears. This
may not be possible for example with marginalia, which may not be
anchored to an exact location.  For simplicity, it may be adequate to
position marginal notes before the relevant paragraph or other
element.  Notes may also be placed in a separate division of the text
(as end-notes are, in printed books) and linked to the relevant
portion of the text using their <IDENT>target</IDENT> attribute.</P>
<P>The <IDENT>n</IDENT> attribute may be used to supply the number
or identifier of a note if this is required.  The <IDENT>resp</IDENT>
attribute should be used consistently to distinguish between authorial
and editorial notes, if the work has both kinds; otherwise, the TEI
header should state which kind they are.</P>
<P>Examples:
<EG><![CDATA [Collections are ensembles of distinct
entities or objects of any sort.
<note place=foot n=1>
We explain below why we use the uncommon term
<mentioned>collection</mentioned>
instead of the expected
<mentioned>set</mentioned>.
Our usage corresponds to the <mentioned>aggregate</mentioned>
of many mathematical writings and to the sense of
<mentioned>class</mentioned> found
in older logical writings.
</note>
The elements ...
]]></EG><!-- Are we sure this is right?  I could have sworn this book
had end notes, not foot notes  --><EG><![CDATA [<lg id=RAM609>
<note place=margin>The curse is finally expiated</note>
<l>And now this spell was snapt: once more</l>
<l>I viewed the ocean green,</l>
<l>And looked far forth, yet little saw</l>
<l>Of what had else been seen &dash;</l>
]]>
</EG>
</P></DIV1>
<DIV1 ID="xr"><HEAD>Cross References and Links</HEAD>
<P>Explicit cross references or links from one point in a text to
another in the same SGML document may be encoded using the elements
described in section <PTR TARGET="ptrs">. References or links to
elements of some other SGML document, or to parts of non-SGML
documents, may be encoded using the <TERM>TEI extended pointers</TERM>
described in section <PTR TARGET="xptrs">. Implicit links (such as
the association between two parallel texts, or that between a text and
its interpretation) may be encoded using the linking attributes
discussed in section <PTR TARGET="xatts">.
</P>
<DIV2 ID="ptrs"><HEAD>Simple Cross References</HEAD>
<P>A cross reference from one point within a single document to
another can be encoded using either of the following elements:
<LIST TYPE="gloss">
<LABEL><GI>ref</GI></LABEL>
<ITEM>a reference to another location in the current document, in
terms of one or more identifiable elements, possibly modified by
additional text or comment.
</ITEM>
<LABEL><GI>ptr</GI></LABEL>
<ITEM>a pointer to another location in the current document in terms
of one or more identifiable elements.
</ITEM>
</LIST>
</P>
<P>These elements share the following attributes:
<LIST TYPE="gloss">
<LABEL><IDENT>target</IDENT></LABEL>
<ITEM>specifies the destination of the pointer as one or more SGML
identifiers</ITEM>
<LABEL><IDENT>type</IDENT></LABEL>
<ITEM>categorizes the pointer in some respect, using any convenient
set of categories.</ITEM>
<LABEL><IDENT>targType</IDENT></LABEL>
<ITEM>specifies the type (or types)  of element to which this pointer
may point.</ITEM>
<LABEL><IDENT>crDate</IDENT></LABEL>
<ITEM>specifies when this pointer was made.</ITEM>
<LABEL><IDENT>resp</IDENT></LABEL>
<ITEM>specifies the creator of the pointer.</ITEM>
</LIST>
</P>
<P>The difference between these two elements is that <GI>ptr</GI> is
an empty element, simply marking a point from which a link is to be
made, whereas <GI>ref</GI> may contain some text as well ---
typically the text of the cross-reference itself. The <GI>ptr</GI>
element would be used for a cross reference which is to be indicated by
some non-verbal means such as a symbol or icon, or in an electronic
text by a button. It is also useful in document production systems,
where the formatter can generate the correct verbal form of the cross
reference.
</P>
<P>The following two forms, for example, are logically equivalent
(assuming we have documented somewhere the exact verbal form of cross
references represented by <GI>ptr</GI> elements):
<EG><![CDATA [See especially <ref target=SEC12>section 12 on page 34</ref>.
]]></EG><EG><![CDATA [See especially <ptr target=SEC12>.
]]>
</EG>  The value of the <IDENT>target</IDENT> attribute must be an
SGML identifier in the current SGML document.  This implies that the
passage or phrase being pointed at must bear an identifier, and must
therefore be tagged as an element of some kind. In the following
example, the cross reference is to a
<GI>div1</GI> element:
<EG><![CDATA [    ...
    see especially <ptr target=SEC12>.
    ...
    <div1 id=SEC12><head>Concerning Identifiers...
    ...
]]>
</EG> </P>
<P>Because the <IDENT>id</IDENT> attribute is global, any element in
a document may be pointed to in this way. In the following example, a
paragraph has been given an identifier so that it may be pointed at:
<EG><![CDATA [    ...
    this is discussed in <ref target=pspec>the paragraph on links</ref>
    ...
    <p id=pspec>Links may be made to any kind of element
    ...
]]>
</EG>
</P>
<P>The <IDENT>targType</IDENT> attribute can be used to specify that
the element pointed to must be of a particular type, as in the
following example:
<EG><![CDATA [    ...
    this is discussed in <ref target=dspec targType='div1 div2'>
    the section on links</ref>
]]>
</EG>
</P>
<P>This reference should fail if the element with identifier
<IDENT>dspec</IDENT> is not either a <GI>div1</GI> or a <GI>div2</GI>.
Note however that this check cannot be carried out by an SGML parser
alone, since the SGML parser can only check that some element
<IDENT>dspec</IDENT> exists.
</P>
<P>The <IDENT>type</IDENT> attribute can be used to categorize the
link represented by the pointer in any convenient way. The
<IDENT>resp</IDENT> and <IDENT>crDate</IDENT> attributes may also be
used to represent the person or agency responsible for making the
link, and its date of creation, as in the following example:
<EG><![CDATA [    ...
   this is discussed in
   <ref type=xref resp=auto crdate=950521 target=dspec targtype='div1 div2'>
   the section on links</ref>
]]>
</EG> These attributes are most likely to be of use in hypertext
systems containing very many pointers used for a variety of purposes
and created by a variety of means.</P>
<P>Sometimes the target of a cross reference  does not correspond
with any particular feature of a text, and so may not be tagged as an
element of some kind. If the desired target is simply a point in the
current document, the easiest way to mark it is by introducing an
<GI>anchor</GI> element at the appropriate spot. If the target is
some sequence of words not otherwise tagged, the <GI>seg</GI> element
may be introduced to mark them. These two elements are described as
follows:
<LIST TYPE="gloss">
<LABEL><GI>anchor</GI></LABEL>
<ITEM>specifies a location or point within a document so that it may
be pointed to.</ITEM>
<LABEL><GI>seg</GI></LABEL>
<ITEM>identifies a span or segment of text within a document so that
it may be pointed to. Attributes include
<LIST TYPE="gloss">
<LABEL><IDENT>type</IDENT></LABEL>
<ITEM>categorizes the segment</ITEM></LIST></ITEM>
</LIST></P>
<P>In the following (imaginary) example, <GI>ref</GI> elements have
been used to represent points in this text which are to be linked in
some way to other parts of it; in the first case to a point, and in
the second, to a sequence of words:
<EG><![CDATA [  Returning to <ref target=ABCD>the point where I dozed
  off</ref>, I noticed that <ref target=EFGH>three
  words</ref> had been circled in red by a previous reader
]]>
</EG>
</P>
<P>This encoding requires that elements with the specified
identifiers (<IDENT>ABCD</IDENT> and <IDENT>EFGH</IDENT> in this
example) are to be found somewhere else in the current document.
Assuming that no element already exists to carry these identifiers,
the <GI>anchor</GI> and
<GI>seg</GI> elements may be used:
<EG><![CDATA [  .... <anchor type=bookmark id='ABCD'> ....
   ....<seg type=target id='EFGH'> ... </seg> ...
]]>
</EG>
</P>
<P>The <IDENT>type</IDENT> attribute should be used (as above) to
distinguish amongst different purposes for which these general purpose
elements might be used in a text. Some other uses are  discussed in
section <PTR TARGET="xatts"> below.
</P>
</DIV2>
<DIV2 ID="xptrs"><HEAD>Extended Pointers</HEAD>
<P>The elements <GI>ptr</GI> and <GI>ref</GI> can only be used for
cross-references or links whose targets occur within the same SGML
document as their source. They can also refer only to SGML elements.
The elements discussed in this section are not restricted in this way.
<LIST TYPE="gloss">
<LABEL><GI>xptr</GI></LABEL>
<ITEM>defines a pointer to another location in the current document
or an external document.</ITEM>
<LABEL><GI>xref</GI></LABEL>
<ITEM>defines a pointer to another location in the current document
or an external document, possibly modified by additional text or
comment.</ITEM>
</LIST>
</P>
<P>In addition to the pointer attributes already discussed in section
<PTR TARGET="ptrs"> above, these elements share the following
additional attributes, which are used to specify the target of the
cross reference or link in place of the
<IDENT>target</IDENT> attribute:
<LIST TYPE="gloss">
<LABEL><IDENT>doc</IDENT></LABEL>
<ITEM>specifies the document within which the required location is to
be found, by default the current document.</ITEM>
<LABEL><IDENT>from</IDENT></LABEL>
<ITEM>specifies the start of the destination of the pointer as an
expression in the TEI extended pointer syntax, by default the whole of
the document indicated by the <IDENT>doc</IDENT> attribute. </ITEM>
<LABEL><IDENT>to</IDENT></LABEL>
<ITEM>specifies the endpoint of the destination of the pointer as an 
  expression in the TEI extended pointer syntax; may only be specified
if the <IDENT>from</IDENT> attribute has been. </ITEM>
</LIST>
</P>
<P>A full specification of the language used to express the target of
TEI extended pointers is beyond the scope of this document; here we
list here only a few of its more generally useful features. The full
Guidelines should be consulted for more detail.
</P>
<P>An <GI>xptr</GI> (or <GI>xref</GI>) may point to the whole of
some other document simply by supplying an entity name as the value of
the
<IDENT>doc</IDENT> attribute, as in this example:
<EG><![CDATA [  see <xref doc=P3>The TEI Guidelines, passim</xref>
]]></EG>
</P>
<P>This example assumes that some system or public entity with the
name
<IDENT>P3</IDENT> has been declared. This declaration may be placed
within the <IDENT>litemods.ent</IDENT> extension file or in some
other manner specific to the particular SGML authoring software in use
(as discussed in section <PTR TARGET="xfigs">).
</P>
<P>The <IDENT>from</IDENT> attribute is used to specify some
location within whatever document is specified by the <IDENT>doc</IDENT>
attribute. The specification uses a special language, called the
<TERM>TEI extended pointer syntax</TERM>; only some details of which
are given here. In this language, locations are defined as a series of
<TERM>steps</TERM>, each one identifying some part of the document,
often in terms of the locations identified by the previous step.  For
example, you would point to the third sentence of the second paragraph
of chapter two by selecting chapter two in the first step, the second
paragraph in the second step, and the third sentence in the last step.
A step can be defined in terms of SGML concepts (such as <KW>parent</KW>,
<KW>descendent</KW>, <KW>preceding</KW>, etc.) or, more loosely, in
terms of text patterns, word or character positions. You  can also use
a foreign (non-SGML) notation, or specify a location within a graphic
in terms of its co-ordinate system.</P>
<P>The <IDENT>from</IDENT> and <IDENT>to</IDENT> attributes use the
same notation.  Each points to some portion of the target document;
the extended pointer as a whole points to the section beginning at the
start of the <IDENT>from</IDENT> and running to the end of the
<IDENT>to</IDENT>.</P>
<P>The first step in a location path will often be to specify the
identifier of some element within the target document, as in this
example:
<EG><![CDATA [<xptr doc=P3 from='id (SA)'>
]]></EG>  This selects the whole of whatever element bears the
identifier
<IDENT>SA</IDENT> within the entity <IDENT>P3</IDENT>. If a
finer-grained target is required, other steps might follow. The
following keywords are available for you to select other elements in
terms of their relationship to this one:
<LIST TYPE="gloss">
<LABEL><IDENT>child</IDENT></LABEL>
<ITEM>elements contained by this one.</ITEM>
<LABEL><IDENT>ancestor</IDENT></LABEL>
<ITEM>elements which contains this one, directly or indirectly.</ITEM>
<LABEL><IDENT>previous</IDENT></LABEL>
<ITEM>elements with the same parent as this one but preceding it in
the document.</ITEM>
<LABEL><IDENT>next</IDENT></LABEL>
<ITEM>elements with the same parent as this one and following it in
the document.</ITEM>
<LABEL><IDENT>preceding</IDENT></LABEL>
<ITEM>elements in the document which start before this one does,
irrespective of their parents.</ITEM>
<LABEL><IDENT>following</IDENT></LABEL>
<ITEM>elements in the document which start after this one does,
irrespective of their parents.</ITEM>
</LIST>
</P>
<P>Each of these keywords implies a particular set of elements (the
set of children, the set of ancestors, the set of previous siblings,
etc.); to specify which element in the set we are pointing at, the
keyword may optionally be followed by a parenthesized list containing:
<LIST>
<ITEM>a positive or negative number, indicating which of the possibly
many elements found is intended (+1 indicating the first element
encountered, starting from the current location, and -1 indicating the
last), or the keyword <KW>all</KW>, indicating that all the elements
in the set are to be pointed at;</ITEM>
<ITEM>a generic identifier, indicating the type of element required,
or a star indicating that any element type will do;</ITEM>
<ITEM>a set of attribute names and values, indicating that the
element selected should have attributes with the names and values
specified, if any.</ITEM></LIST>
</P>
<P>Continuing the above example, the following reference will select
the third <GI>p</GI> element directly contained by whatever element
has the identifier <IDENT>SA</IDENT>:
<EG><![CDATA [<xptr doc=P3 from='id (SA) child (3 p)'>
]]>
</EG></P>
<P>Similarly, assuming that the entity <IDENT>P3</IDENT> is in fact
a reference to the SGML form of the TEI Guidelines, then the following
reference will select section 14.2.2 of that publication in which (as
it happens) the extended pointer syntax is formally defined:
<EG><![CDATA [For full details, see
<ref doc=P3 from='id (SA) child (2 div2) child (2 div3)'>
  TEI Extended pointer syntax definition
</ref>
]]></EG>
</P>
<P>Normally, the scope  of a cross reference will be adequately
defined by the <IDENT>from</IDENT> attribute. For some  documents,
however, it may be more convenient to define both a starting and an
ending scope. As noted above, the <IDENT>to</IDENT> attribute is
provided for this purpose. For example,
<EG><![CDATA [  <xptr doc=P1 from='id (xyz)' to='id (abc)'>
]]></EG> is an extended pointer whose target is the sequence starting
at the beginning of whatever element in document <IDENT>P1</IDENT>
has identifier <IDENT>XYZ</IDENT> and ending at the end of whatever
element in the same document has identifier <IDENT>ABC</IDENT>. Any
elements in between are also included, irrespective of structure; the
pointer is erroneous if the end of <IDENT>ABC</IDENT> precedes the
start of <IDENT>XYZ</IDENT>. </P>
<P>Very complex specifications are easily built using this syntax.
For example, the following reference will select the most recent <GI>head</GI>
element which carries an attribute <IDENT>lang</IDENT> with the value
<KW>LAT</KW>, and which occurs before the start of the element with
identifier
<IDENT>SA</IDENT>:
<EG><![CDATA [<xptr doc=P3 from='id (SA) preceding (1 head lang lat)'>
]]></EG>
</P>
<P>If no value is supplied for the <IDENT>doc</IDENT> attribute, the
current document is assumed. Thus, the following references are
semantically equivalent. They both indicate the element with
identifier
<IDENT>X1</IDENT> within the current document:
<EG><![CDATA [<ptr target=X1>
<xptr from='id (X1)'>
]]></EG>
</P></DIV2>
<DIV2 ID="xatts"><HEAD>Linking Attributes</HEAD>
<P>The following special purpose <TERM>linking</TERM> attributes are
defined for every element in the TEI Lite DTD:
<LIST TYPE="gloss">
<LABEL><IDENT>ana</IDENT></LABEL>
<ITEM>links an element with its interpretation.
</ITEM>
<LABEL><IDENT>corresp</IDENT></LABEL>
<ITEM>links an element with one or more other corresponding elements.
</ITEM>
<LABEL><IDENT>next</IDENT></LABEL>
<ITEM>links an element to the next element in an aggregate.
</ITEM>
<LABEL><IDENT>prev</IDENT></LABEL>
<ITEM>links an element to the previous element in an aggregate.
</ITEM></LIST></P>
<P>The <IDENT>ana</IDENT> (analysis) attribute is intended for use
where a set of abstract analyses or interpretations have been defined
somewhere within a document, as further discussed in section
<PTR TARGET="X4">. For example, a linguistic analysis of the sentence
<Q>John loves Nancy</Q> might be encoded as follows:
<EG><![CDATA [<seg type=sentence ana=SVO>
  <seg type=lex ana=NP1>John</seg>
  <seg type=lex ana=VVI>loves</seg>
  <seg type=lex ana=NP1>Nancy</seg>
</seg>
]]></EG>  This encoding implies the existence elsewhere in the
document of elements with identifiers <IDENT>SVO</IDENT>, <IDENT>NP1</IDENT>,
and
<IDENT>VV1</IDENT> where the significance of these particular codes
is explained. Note the use of the <GI>seg</GI> element to mark
particular components of the analysis, distinguished by the <IDENT>type</IDENT>
attribute.
</P>

<P>The <IDENT>corresp</IDENT> (corresponding) attribute provides a
simple way of representing some form of correspondence between two
elements in a text. For example, in a multilingual text, it may be
used to link translation equivalents, as in the following example
<EG><![CDATA [<seg lang=FRA id=FR1 corresp=EN1>Jean aime Nancy</seg>
<seg lang=ENG id=EN1 corresp=FR1>John loves Nancy</seg>
]]></EG>
</P>
<P>The same mechanism may be used for a variety of purposes. In the
following example, it has been used to represent anaphoric
correspondences between <Q REND="inline" TYPE="inline">the show</Q>
and <Q REND="inline" TYPE="inline">Shirley</Q>, and between
<Q REND="inline" TYPE="inline">NBC</Q> and <Q REND="inline" TYPE="inline"
>the network</Q>:
<EG><![CDATA [<p><title id=shirley>Shirley</title>, which made
its Friday night debut only a month ago, was
not listed on <name id=nbc>NBC</name>'s new schedule,
although <seg id=network corresp=nbc>the network</seg>
says <seg id=show corresp=shirley>the show</seg>
still is being considered.
]]>
</EG>
</P>
<P>The <IDENT>next</IDENT> and <IDENT>prev</IDENT> attributes
provide a simple way of linking together the components of a
discontinuous  element, as in the following example:
<EG><![CDATA [<q id=Q1a next=Q1b>Who-e debel you?</q>
&mdash he at last said &mdash
<q id=Q1b prev=Q1a>you no speak-e,
damme, I kill-e.</q>  And so saying,
the lighted tomahawk began flourishing
about me in the dark.
]]>
</EG><!-- Melville, Moby Dick (but where?) --><!-- (The Spouter Inn,
close to beginning) -->
<!-- ophelias song eg might be better --><!-- ha. -->
</P>
</DIV2></DIV1>
<DIV1><HEAD>Editorial Interventions</HEAD>
<P>The process of encoding an electronic text has much in common with
the process of editing a manuscript or other text for printed
publication. In both cases a conscientious editor may  wish to record
both the original state of the source and any editorial correction or
other change made in it. The elements discussed in this and the next
section provide some facilities for meeting these needs.</P>
<P>The following pair of elements may be used to mark
<TERM>correction</TERM>, that is editorial changes introduced where
the editor believes the original to be erroneous:
<LIST TYPE="gloss">
<LABEL><GI>corr</GI></LABEL>
<ITEM>contains the correct form of a passage apparently erroneous in
the copy text. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>sic</IDENT></LABEL>
<ITEM>gives the original form of the apparent error in the copy text.</ITEM>
<LABEL><IDENT>resp</IDENT></LABEL>
<ITEM>signifies the editor or transcriber responsible for suggesting
the correction held as the content of the
<GI>corr</GI> element.</ITEM>
<LABEL><IDENT>cert</IDENT></LABEL>
<ITEM>signifies the degree of certainty ascribed to the correction
held as the content of the <GI>corr</GI> element.</ITEM>
</LIST>
</ITEM>
<LABEL><GI>sic</GI></LABEL>
<ITEM>contains text reproduced although apparently incorrect or
inaccurate. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>corr</IDENT></LABEL>
<ITEM>gives a correction for the apparent error in the copy text.</ITEM>
<LABEL><IDENT>resp</IDENT></LABEL>
<ITEM>signifies the editor or transcriber responsible for suggesting
the correction.</ITEM>
<LABEL><IDENT>cert</IDENT></LABEL>
<ITEM>signifies the degree of certainty ascribed to the correction.
</ITEM></LIST></ITEM>
</LIST>
</P>
<P>The following pair of elements may be used to mark
<TERM>normalization</TERM>, that is editorial changes introduced for
the sake of consistency or modernization of a text:
<LIST TYPE="gloss">
<LABEL><GI>orig</GI></LABEL>
<ITEM>contains the original form of a reading, for which a
regularized form is given in an attribute value. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>reg</IDENT></LABEL>
<ITEM>gives a regularized (normalized) form of the text.</ITEM>
<LABEL><IDENT>resp</IDENT></LABEL>
<ITEM>identifies the individual responsible for the regularization of
the word or phrase.</ITEM>
</LIST>
</ITEM>
<LABEL><GI>reg</GI></LABEL>
<ITEM>contains a reading which has been regularized or normalized in
some sense. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>orig</IDENT></LABEL>
<ITEM>gives the unregularized form of the text as found in the source
copy.</ITEM>
<LABEL><IDENT>resp</IDENT></LABEL>
<ITEM>identifies the individual responsible for the regularization of
the word or phrase.</ITEM>
</LIST>
</ITEM>
<!--
<tagdesc tagdoc=add atts='place resp hand cert'> <tagdesc tagdoc=del
atts='type status resp hand cert'>
-->
</LIST></P>
<!-- <P>The <GI>corr</GI> element is used for corrections of misspellings
or other errors; the <GI>reg</GI> element is used for regularizations
(not corrections) of archaic or otherwise non-standard (but not
<SOCALLED>incorrect</SOCALLED>) spellings.-->
<P>For example, the reading
<EG> ... for his nose was as sharp as a pen and a' table of green feelds
</EG> is taken by Gifford as involving (1) the erroneous substitution
of <MENTIONED>table</MENTIONED> for <MENTIONED>babbled</MENTIONED>,
and (2) the non-standard spellings <MENTIONED>a'</MENTIONED> and
<MENTIONED>feelds</MENTIONED> for <MENTIONED>he</MENTIONED> and
<MENTIONED>fields</MENTIONED>.  Gifford's conjecture might be encoded
thus:<EG><![CDATA [... for his nose was as sharp as a pen and <reg sic="a'">he</reg>
 <corr sic='table' ed=Gifford>babbl'd</corr> of green
<reg sic='feelds'>fields</reg>
]]>
</EG>
</P></DIV1>
<DIV1><HEAD>Omissions, Deletions, and Additions</HEAD>
<P>In addition to correcting or normalizing words and phrases,
editors and transcribers may also supply missing material, omit
material, or transcribe material deleted or crossed out in the source.
In addition, some material may be particularly hard to transcribe
because it is hard to make out on the page.  The following elements
may be used to record such phenomena:<LIST TYPE="gloss">
<LABEL><GI>add</GI></LABEL>
<ITEM>contains letters, words, or phrases inserted in the text by an
author, scribe, annotator, or corrector.  Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>place</IDENT></LABEL>
<ITEM>if the addition is written into the copy text, indicates where
the additional text is written.  Sample values include
<CODE>inline</CODE>,
<CODE>supralinear</CODE>, <CODE>infralinear</CODE>,
<CODE>left</CODE> (in left margin),
<CODE>right</CODE> (in right margin),
<CODE>top</CODE>,
<CODE>bottom</CODE>, etc.
</ITEM>
</LIST></ITEM>
<LABEL><GI>gap</GI></LABEL>
<ITEM>indicates a point where material has been omitted in a
transcription, whether for editorial reasons described in the TEI
header, as part of sampling practice, or because the material is
illegible or inaudible. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>desc</IDENT></LABEL>
<ITEM>gives a description of the omitted text.</ITEM>
<LABEL><IDENT>resp</IDENT></LABEL>
<ITEM>indicates the editor, transcriber or encoder responsible for
the decision not to provide any transcription of the text and hence
the application of the <GI>gap</GI> tag.</ITEM>
</LIST>
</ITEM>
<LABEL><GI>del</GI></LABEL>
<ITEM>contains a letter, word or passage deleted, marked as deleted,
or otherwise indicated as superfluous or spurious in the copy text by
an author, scribe, annotator or corrector. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>type</IDENT></LABEL>
<ITEM>classifies the type of deletion using any convenient typology.</ITEM>
<LABEL><IDENT>status</IDENT></LABEL>
<ITEM>may be used to indicate faulty deletions, e.g. strikeouts which
include too much or too little text.</ITEM>
<LABEL><IDENT>hand</IDENT></LABEL>
<ITEM>signifies the hand of the agent which made the deletion.</ITEM>
</LIST>
</ITEM>
<LABEL><GI>unclear</GI></LABEL>
<ITEM>contains a word, phrase, or passage which cannot be transcribed
with certainty because it is illegible or inaudible in the source.
Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>reason</IDENT></LABEL>
<ITEM>indicates why the material is hard to transcribe.</ITEM>
<LABEL><IDENT>resp</IDENT></LABEL>
<ITEM>indicates the individual responsible for the transcription of
the letter, word or passage contained with the
<GI>unclear</GI> element.</ITEM></LIST></ITEM></LIST>
</P>
<P>These elements may be used to record changes made by an editor, by
the transcriber, or (in manuscript material) by the author or scribe.
For example, if the source for an electronic text read
<EG>The following elements are provided for
for simple editorial interventions.
</EG> then it might be felt desirable to correct the obvious error,
but at the same time to record the deletion of the superfluous second
<MENTIONED>for</MENTIONED>, thus:
<EG><![CDATA [The following elements are provided for
<del hand=LB>for</del> simple editorial interventions.
]]>
</EG> The attribute value <CODE>LB</CODE> on the <IDENT>hand</IDENT>
attribute indicates that <Q REND="inline" TYPE="inline">LB</Q>
corrected the duplication of <MENTIONED>for</MENTIONED>.</P>
<P>If the source read<EG>The following elements provided for
for simple editorial interventions.</EG> (i.e. if the verb had been
inadvertently dropped) then the corrected text might read:
<EG><![CDATA [The following elements <add hand=LB>are</add> provided for
<del hand=LB>for</del> simple editorial interventions.
]]>
</EG> The attribute value <CODE>LB</CODE> on the <IDENT>hand</IDENT>
attribute indicates that <Q REND="inline" TYPE="inline">LB</Q>
corrected the duplication of <MENTIONED>for</MENTIONED>.
</P>
<P>These elements are not limited to changes made by an editor; they
can also be used to record authorial changes in manuscripts.  A
manuscript in which the author  has first written <Q REND="inline" TYPE="inline"
>How it galls me, what a galling shadow</Q>, then crossed out the
word
<MENTIONED>galls</MENTIONED> and inserted <MENTIONED>dogs</MENTIONED>
might be encoded thus:
<EG><![CDATA [How it <del hand=DHL type=overstrike>galls</del>
<add hand=DHL place=supralinear>dogs</add> me,
what a galling shadow
]]></EG>
<!--Here, D. H. Lawrence, in the poem "Eloi, Eloi, lama sabachthani",
in Pierpont Morgan MA1892, number 129 in British Literary Manuscripts
/ Series II:  from 1800 to 1914, by V. Klinkenborg, H. Cahoon, and C.
Ryskamp (New York:  Pierpont Morgan Library, 1981).--></P>
<P>Similarly, the <GI>unclear</GI> and <GI>gap</GI> elements may be
used together to indicate the omission of illegible material; the
following example also shows the use of <GI>add</GI> for a
conjectural emendation:<!-- NOTE PLACE="foot">Letter from Colonel
Henry Lee, Jr., to General Nathanael Greene, 25 January 1781, from
<BIBL><TITLE >The Papers of General Nathanael Greene</TITLE>, vol.
VII, ed. <EDITOR>Richard K. Showman</EDITOR>, <EDITOR>Dennis M.
Conrad</EDITOR>, <EDITOR>Roger N. Parks</EDITOR>, and
<EDITOR>Elizabeth C. Stevens</EDITOR> (<PUBPLACE>Chapel
Hill</PUBPLACE>:  <PUBLISHER>Univ. of North Carolina Press, published
for the Rhode Island Historical Society</PUBLISHER>,
<DATE>1994</DATE>), p. 197.</BIBL></NOTE -->
<EG><![CDATA [One hundred & twenty good regulars joined to me
<unclear><gap reason='indecipherable'></unclear>
& instantly, would aid me signally <add hand=ed>in?</add>
an enterprise against Wilmington.
]]>
</EG></P>
<P>The <GI>del</GI> element marks material which is transcribed as
part of the electronic text despite being marked as deleted, while
<GI>gap</GI> marks the location of material which is omitted from the
electronic text, whether it is legible or not.  A language corpus, for
example, might omit long quotations in foreign languages:<!--Hans
Jo/rgen Marker, Encoding Standards for the Generalist and the
Specialist:  Complex, compound documents as a test case, in Modelling
Historical Data, ed. Daniel I. Greenstein (St. Katharinen:
Max-Planck-Institut f&ue;r Geschichte, in Kommission bei Scripta
Mercaturae Verlag, 1991), pp. 159-160.-->
<EG><![CDATA [<p> ... An example of a list appearing in a fief ledger of
<name type=place>Koldinghus</name> <date>1611/12</date>
is given below. It shows cash income from a sale of
honey.</p>
<q><gap desc='quotation from ledger'
    reason='in Danish'></q>
<p>A description of the overall structure of the account is
once again ... </p>
]]></EG></P>
<P>Other corpora (particular those constructed before the widespread
use of scanners) systematically omit figures and mathematics:<GAP><EG>
<![CDATA [<p>At the bottom of your screen below the mode line is the
<term>minibuffer</term>.  This is the area where Emacs
echoes the commands you enter and where you specify
filenames for Emacs to find, values for search and replace,
and so on.
<gap desc='diagram of Emacs screen' reason='graphic'>
</p>
]]></EG></P>

<!-- Debra Cameron and Bill Rosenblatt, Learning GNU Emacs.
Sebastopol: O'Reilly & Associates, 1991, p. 8. --></DIV1>
<DIV1><HEAD>Names, Dates, Numbers and Abbreviations</HEAD>
<P>The TEI scheme defines elements for a large number of
<SOCALLED>data-like</SOCALLED> features which may appear almost
anywhere within almost any kind of text. These features may be of
particular interest in a range of disciplines; they all relate to
objects external to the text itself, such as the names of persons and
places, numbers and dates. They also pose particular problems for many
natural language processing (NLP) applications because of the variety
of ways in which they may be presented within a text. The elements
described here, by making such features explicit, reduce the
complexity of processing texts containing them.
</P>
<DIV2 ID="nomen"><HEAD>Names and Referring Strings</HEAD>
<P>A <TERM>referring string</TERM> is a phrase which refers to some
person, place, object, etc. Two elements are provided to mark such
strings:
<LIST TYPE="gloss">
<LABEL><GI>rs</GI></LABEL>
<ITEM>contains a general purpose name or referring string. Attributes
include:
<LIST TYPE="gloss">
<LABEL><IDENT>type</IDENT></LABEL>
<ITEM>indicates more specifically the object referred to by the
referencing string. Values might include <CODE>person</CODE>,
<CODE>place</CODE>, <CODE>ship</CODE>, <CODE>element</CODE>, etc.</ITEM>
</LIST>
</ITEM>
<LABEL><GI>name</GI></LABEL>
<ITEM>contains a proper noun or noun phrase. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>type</IDENT></LABEL>
<ITEM>indicates the type of the object which is being named by the
phrase.</ITEM>
</LIST>
</ITEM>
</LIST>    The <IDENT>type</IDENT> attribute is used to distinguish
amongst (for example) names of persons, places and organizations,
where this is possible:
<EG><![CDATA [<q>My dear <rs type=person>Mr. Bennet</rs>, </q>
said his lady to him one day, <q>have you heard
that <rs type=place>Netherfield Park</rs> is let
at last?</q>
]]>
</EG><!-- Austen Pride and Prejudice, chap 1 -->
<EG><![CDATA [It being one of the principles of the
<rs type=organization>Circumlocution Office</rs> never,
on any account whatsoever, to give a straightforward answer,
<rs type=person>Mr Barnacle</rs> said, <q>Possibly.</q>
]]>
</EG>
<!-- Little Dorrit, peng ed, p 153 -->
</P>
<P>As the following example shows, the <GI>rs</GI> element may be
used for any reference to a person, place, etc, not necessarily one in
the form of a proper noun or noun phrase.
<EG><![CDATA [<q>My dear <rs type=person>Mr. Bennet</rs>,</q>
said <rs type=person>his lady</rs> to him
one day...
]]>
</EG>
</P>
<P>The <GI>name</GI> element by contrast is provided for the special
case of referencing strings which consist only of proper nouns; it may
be used synonymously with the <GI>rs</GI> element, or nested within
it if a referring string contains a mixture of common and proper
nouns.
</P>
<P>Simply tagging something as a name is generally not enough to
enable automatic processing of personal names into the canonical forms
usually required for reference purposes. The name as it appears in the
text may be inconsistently spelled, partial, or vague.  Moreover, name
prefixes such as <MENTIONED>van</MENTIONED> or <MENTIONED>de la</MENTIONED>,
 may or may not be included as part of the reference form of a name,
depending on the language and country of origin of the bearer.
</P>
<P>The following attributes are also available for these and similar
elements to help overcome these difficulties:
<LIST TYPE="gloss">
<LABEL><IDENT>key</IDENT></LABEL>
<ITEM>provides an alternative identifier for the object being named,
such as a database record key.</ITEM>
<LABEL><IDENT>reg</IDENT></LABEL>
<ITEM>gives a normalized or regularized form of the name used.</ITEM>
</LIST>   The <IDENT>key</IDENT> attribute may be useful as a means
of gathering together all references to the same individual or
location scattered throughout a document:
<EG><![CDATA [  <q>My dear <rs type=person key=BENM1>Mr. Bennet</rs>,
  </q> said <rs type=person key=BENM2>his lady</rs>
  to him one day, <q>have you heard that
  <rs type=place key=NETP1>Netherfield Park</rs>
  is let at last?</q>
]]>
</EG>
</P>
<P>This use should be distinguished from the case of the
<IDENT>reg</IDENT> (regularization) attribute, which provides a means
of marking the standard form of a referencing string as demonstrated
below:
<EG><![CDATA [  <name type=person key=WADLM1 reg='de la Mare, Walter'>
     Walter de la Mare
  </name>
  was born at
  <name key=Ch1 type=place>Charlton</name>, in
  <name key=KT1 type=county>Kent</name>, in 1873.
]]>
</EG>
<!-- Frank Swinnerton, The Georgian Literary Scene, 1938, p. 195 -->
</P>
<P>More detailed tagging of the components of proper names is also
possible, using the additional tag set for names and dates.
</P>
</DIV2>
<DIV2><HEAD>Dates and Times</HEAD>
<P>Tags for the more detailed encoding of times and dates include the
following:
<LIST TYPE="gloss">
<LABEL><GI>date</GI></LABEL>
<ITEM>contains a date in any format. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>calendar</IDENT></LABEL>
<ITEM>indicates the system or calendar to which the date belongs.</ITEM>
<LABEL><IDENT>value</IDENT></LABEL>
<ITEM>gives the value of the date in some standard form, usually
yyyy-mm-dd.</ITEM>
</LIST>
</ITEM>
<LABEL><GI>time</GI></LABEL>
<ITEM>contains a phrase defining a time of day in any format.
Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>value</IDENT></LABEL>
<ITEM>gives the value of the time in a standard form.</ITEM>
</LIST>
</ITEM>
</LIST>
</P>
<P>The <IDENT>value</IDENT> attribute specifies a normalized form
for the date or time, using a recognized format such as ISO 8601.
Partial dates or times (e.g. <Q REND="inline" TYPE="inline">1990</Q>,
<Q REND="inline" TYPE="inline">September 1990</Q>,
<Q REND="inline" TYPE="inline">twelvish</Q>) can usually be expressed
by simply omitting a part of the value supplied; alternatively
imprecise dates or times (for example <Q REND="inline" TYPE="inline">early
August</Q>,
<Q REND="inline" TYPE="inline">some time after ten and before twelve</Q>)
may be expressed as date or time ranges.  If either end of the date or
time range is known to be accurate, (for example, <Q REND="inline" TYPE="inline"
>at some time before 1230</Q>, <Q REND="inline" TYPE="inline">a few
days after Hallowe'en</Q>) the <IDENT>exact</IDENT> attribute may be
used to specify this.
</P>
<P>Examples:
<!---->
<EG><![CDATA [<date value='1980-02-21'>21 Feb 1980</date>
<date value='1990'>1990</date>
<date value='1990-09'>September 1990</date>
]]>
</EG>
<EG><![CDATA [Given on the <date value='1977-06-12'>Twelfth Day of June
in the Year of Our Lord One Thousand Nine Hundred and
Seventy-seven of the Republic the Two Hundredth and first
and of the University the Eighty-Sixth.</date>
]]>
</EG>
<EG><![CDATA [<l>specially when it's nine below zero
<l>and <time value='15:00'>three o'clock in the afternoon</time>
]]>
</EG></P></DIV2>
<DIV2><HEAD>Numbers </HEAD>
<P>Numbers can be written with either letters or digits (<CODE>twenty-one</CODE>,
<CODE>xxi</CODE>, and <CODE>21</CODE>) and their presentation is
language-dependent (e.g. English <MENTIONED>5th</MENTIONED> becomes
Greek <MENTIONED>5.</MENTIONED>; English <MENTIONED>123,456.78</MENTIONED>
equals French
<MENTIONED>123.456,78</MENTIONED>). In natural-language processing or
machine-translation applications, it is often helpful to distinguish
them from other, more <SOCALLED>lexical</SOCALLED> parts of the text.
In other applications, the ability to record a number's value in
standard notation is important. The <GI>num</GI> element provides
this possibility:
<LIST TYPE="gloss">
<LABEL><GI>num</GI></LABEL>
<ITEM>contains a number, written in any form. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>type</IDENT></LABEL>
<ITEM>indicates the type of numeric value. Suggested values include:
<KW>fraction</KW>,
<KW>ordinal</KW>  (for ordinal numbers,  e.g. <Q REND="inline" TYPE="inline"
>21st</Q>),
<KW>percentage</KW>, and <KW>cardinal</KW> (an absolute number, e.g.
<Q REND="inline" TYPE="inline">21</Q>, <Q REND="inline" TYPE="inline">
21.5</Q>, etc.)
</ITEM>
<LABEL><IDENT>value</IDENT></LABEL>
<ITEM>supplies the value of the number in an application-dependent
standard form.</ITEM>
</LIST>
</ITEM>
</LIST>
</P>
<P>For example:
<!---->
<EG><![CDATA [<num value='33'>xxxiii</num>
<num type=cardinal value='21'>twenty-one</num>
<num type=percentage value='10'>ten percent</num>
<num type=percentage value='10'>10%</num>
<num type=ordinal value='5'>5th</num>
]]>
</EG>
</P>
</DIV2>
<DIV2><HEAD>Abbreviations and their Expansion</HEAD>
<P>Like names, dates, and numbers, abbreviations may be transcribed
as they stand or expanded; they may be left unmarked, or encoded using
the following element:
<LIST TYPE="gloss">
<LABEL><GI>abbr</GI></LABEL>
<ITEM>contains an abbreviation of any sort. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>expan</IDENT></LABEL>
<ITEM>gives an expansion of the abbreviation.</ITEM>
<LABEL><IDENT>type</IDENT></LABEL>
<ITEM>allows the encoder to classify the abbreviation according to
some convenient typology. Sample values include
<MENTIONED>contraction</MENTIONED>,
<MENTIONED>suspension</MENTIONED>, <MENTIONED>brevigraph</MENTIONED>,
<MENTIONED>superscription</MENTIONED>, or <MENTIONED>acronym</MENTIONED>.
 The <IDENT>type</IDENT> attribute may also be given values like
<MENTIONED>title</MENTIONED> (for titles of address),
<MENTIONED>geographic</MENTIONED>,
<MENTIONED>organization</MENTIONED>, etc., describing the nature of
the object referred to.</ITEM></LIST></ITEM></LIST>
</P>
<P>The <GI>abbr</GI> element is useful as a means of distinguishing
semi-lexical items such as acronyms or jargon:
<EG><![CDATA [We can sum up the above discussion as follows:  the identity of a
<abbr>CC</abbr> is defined by that calibration of values which
motivates the elements of its <abbr>GSP</abbr>;
]]>
</EG>
<!-- Halliday and Hassan, Language, context, and text:        -->
<!-- aspects of language in a social-semiotic perspective     -->
<!-- (OUP, 1990), p 104                                       -->
<EG><![CDATA [Every manufacturer of <abbr>3GL</abbr> or <abbr>4GL</abbr>
languages is currently nailing on <abbr>OOP</abbr> extensions
]]>
</EG> <!-- .EXE magazine Editorial, 6.11, (May 1992) p2 -->
</P>
<P>The <IDENT>type</IDENT> attribute may be used to distinguish
types of abbreviation by their function, and the <IDENT>expan</IDENT>
attribute may be used to supply an expansion:
<EG><![CDATA [ <name><abbr type=title expan='Doctor'>Dr.</abbr>
 <abbr type=initial expan='Marilyn'>M.</abbr>
 Deegan</name>
 is the Director of the
 <abbr expan='Computers in Teaching Initiative' type=acronym>
 CTI</abbr> Centre for Textual Studies.
]]>
</EG>
</P>
<P>This element is also particularly useful where manuscript
materials in which abbreviation is very frequent are being
transcribed.
</P>
</DIV2>
<DIV2><HEAD>Addresses</HEAD>
<P>The <GI>address</GI> element is used to mark a postal address of
any kind. It contains one or more <GI>addrLine</GI> elements, one for
each line of the address.
<LIST TYPE="gloss">
<LABEL><IDENT>address</IDENT>
</LABEL>
<ITEM>contains a postal or other address, for example of a publisher,
an organization, or an individual.</ITEM>
<LABEL><IDENT>addrLine</IDENT></LABEL>
<ITEM>contains one line of a postal or other address.</ITEM>
</LIST>
</P>
<P>Here is a simple example:
<EG><![CDATA [<address>
<addrLine>Computer Center (M/C 135)</addrLine>
<addrLine>1940 W. Taylor, Room 124</addrLine>
<addrLine>Chicago, IL 60612-7352</addrLine>
<addrLine>U.S.A.</addrLine>
</address>
]]>
</EG></P>
<P>The individual parts of an address may be further distinguished by
 using the <GI>name</GI> element discussed above (section <PTR TARGET="nomen"
>).
<EG><![CDATA [<address>
<addrLine>Computer Center (M/C 135)</addrLine>
<addrLine>1940 W. Taylor, Room 124</addrLine>
<addrLine><name type=city>Chicago</name>, IL 60612-7352</addrLine>
<addrLine><name type>=country>USA</name></addrLine>
</address>
]]>
</EG>
</P></DIV2></DIV1>
<DIV1 ID="lists"><HEAD>Lists</HEAD>
<P>The element <GI>list</GI> is used to mark any kind of
<TERM>list</TERM>.  A list is a sequence of text items, which may be
ordered, unordered, or a glossary list.  Each item may be preceded by
an item label (in a glossary list, this label is the term being
defined):
<LIST TYPE="gloss">
<LABEL><GI>list</GI></LABEL>
<ITEM>contains any sequence of items organized as a list. Attributes
include:
<LIST TYPE="gloss">
<LABEL><IDENT>type</IDENT></LABEL>
<ITEM>describes the form of the list. Suggested values include:
<KW>ordered</KW>,
<KW>bulleted</KW> (for lists with numbered or lettered items, and
lists with bullet-marked items, respectively), <KW>gloss</KW> (for
lists consisting of a set of technical terms, each marked with a <GI>label</GI>
element and accompanied by a gloss or definition marked as an <GI>item</GI>),
and
<KW>simple</KW> (for lists with items not marked with number or
bullets.</ITEM></LIST></ITEM>
<LABEL><GI>item</GI></LABEL>
<ITEM>contains one component of a list.</ITEM>
<LABEL><GI>label</GI></LABEL>
<ITEM>contains the label associated with an item in a list; in
glossaries, marks the term being defined.</ITEM></LIST></P>
<P>Individual list items are tagged with <GI>item</GI>.  The first
<GI>item</GI> may optionally be preceded by a <GI>head</GI>, which
gives a heading for the list.  The numbering of a list may be omitted
(if reconstructible), indicated using the <IDENT>n</IDENT> attribute
on each item, or (rarely) tagged as content using the <GI>label</GI>
element.  The following are all thus equivalent:
<EG><![CDATA [<list>
<head>A short list</head>
<item>First item in list.</item>
<item>Second item in list.</item>
<item>Third item in list.</item>
</list>

<list>
<head>A short list</head>
<item n=1>First item in list.</item>
<item n=2>Second item in list.</item>
<item n=3>Third item in list.</item>
</list>

<list>
<head>A short list</head>
<label>1</label><item>First item in list.</item>
<label>2</label><item>Second item in list.</item>
<label>3</label><item>Third item in list.</item>
</list>
]]>
</EG> The styles should not be mixed in the same list.
</P>
<P>A simple two-column table may be treated as a <TERM>glossary list</TERM>,
tagged <GI>list type=gloss</GI>.  Here, each item comprises a <TERM>term</TERM>
and a <TERM>gloss</TERM>, marked with <GI>label</GI> and <GI>item</GI>
respectively.  These correspond to the elements
<GI>term</GI> and <GI>gloss</GI>, which can occur anywhere in prose
text.
<EG><![CDATA [<list type=gloss>
<head>Vocabulary</head>
<label lang=enm>nu</label>        <item>now</item>
<label lang=enm>lhude</label>     <item>loudly</item>
<label lang=enm>bloweth</label>   <item>blooms</item>
<label lang=enm>med</label>       <item>meadow</item>
<label lang=enm>wude</label>      <item>wood</item>
<label lang=enm>awe</label>       <item>ewe</item>
<label lang=enm>lhouth</label>    <item>lows</item>
<label lang=enm>sterteth</label>  <item>bounds, frisks</item>
<label lang=enm>verteth</label>   <item lang=lat>pedit</item>
<label lang=enm>murie</label>     <item>merrily</item>
<label lang=enm>swik</label>      <item>cease</item>
<label lang=enm>naver</label>     <item>never</item>
</list>
]]>
</EG></P>
<P>Where the internal structure of a list item is more complex, it
may be preferable to regard the list as a <TERM>table</TERM>, for
which special-purpose tagging is defined in an additional TEI tag set.

</P>
<P>Lists of whatever kind can, of course, nest within list items to
any depth required. Here, for example, a glossary list contains two
items, each of which is itself a simple list:
<EG><![CDATA [<list type=gloss><label>EVIL</label>
<item><list type=simple>
   <item>I am cast upon a horrible desolate island, void
          of all hope of recovery.</item>
   <item>I am singled out and separated as it were from
         all the world to be miserable.</item>
   <item>I am divided from mankind &mdash a solitaire; one
           banished from human society.</item>
     </list> <!-- end of first nested list --></item>
<label>GOOD</label>
<item><list type=simple>
     <item>But I am alive; and not drowned, as all my
              ship's company were.</item>
     <item>But I am singled out, too, from all the ship's
             crew, to be spared from death...</item>
     <item>But I am not starved, and perishing on a barren place,
            affording no sustenances....</item>
     </list><!-- end of second nested list --></item>
</list><!-- end of glossary list -->
]]>
</EG>
<!-- D Defoe,  Robinson Crusoe  -->
</P>
<P>A list need not necessarily be displayed in list format.  For
example,
<EG><![CDATA [On those remote pages it is written that animals are
divided into <list rend="run-on"><item n='a'>those that belong to the
Emperor,<item n='b'> embalmed ones, <item n='c'> those
that are trained, <item n='d'> suckling pigs, <item n='e'>
mermaids, <item n='f'> fabulous ones, <item n='g'> stray
dogs, <item n='h'> those that are included in this
classification, <item n='i'> those that tremble as if they
were mad, <item n='j'> innumerable ones, <item n='k'> those
drawn with a very fine camel's-hair brush, <item n='l'>
others, <item n='m'> those that have just broken a flower
vase, <item n='n'> those that resemble flies from a
distance.</list>
]]>
</EG>
<!-- Borges, tr. R. Simms 'The analytical language of John Wilkins'
-->
<!-- in Monegal & Reid, Borges: a reader, (Dutton, 1981), p 141 -->
</P>
<P>Lists of bibliographic items should be tagged using the <GI>listBibl</GI>
element, described in the next section.</P></DIV1>
<DIV1 ID="bibls"><HEAD>Bibliographic Citations</HEAD>
<P>It is often useful to distinguish bibliographic citations where
they occur within texts being transcribed for research, if only so
that they will be properly formatted when the text is printed out. The
element <GI>bibl</GI> is provided for this purpose:
<LIST TYPE="gloss">
<LABEL><GI>bibl</GI></LABEL>
<ITEM>contains a loosely-structured bibliographic citation of which
the sub-components may or may not be explicitly tagged. </ITEM>
</LIST>
</P>
<P>Where the components of a bibliographic reference are to be
distinguished, the following elements may be used as appropriate. It
is generally useful to mark at least those parts (such as the titles
of articles, books, and journals) which will need special formatting. 
The other elements are provided for cases where particular interest
attaches to such details.
<!---->
<LIST TYPE="gloss">
<LABEL><GI>author</GI></LABEL>
<ITEM>in a bibliographic reference, contains the name of the
author(s), personal or corporate, of a work; the primary
<TERM>statement of responsibility</TERM> for any bibliographic item.
</ITEM>
<LABEL><GI>biblScope</GI></LABEL>
<ITEM>defines the scope of a bibliographic reference, for example as
a list of page numbers, or a named subdivision of a larger work.</ITEM>
<LABEL><GI>date</GI></LABEL>
<ITEM>contains a date in any format.</ITEM>
<LABEL><GI>editor</GI></LABEL>
<ITEM>secondary <TERM>statement of responsibility</TERM> for a
bibliographic item, for example the name of an individual, institution
or organization, (or of several such) acting as editor, compiler,
translator, etc.  Attributes include:<LIST TYPE="gloss">
<LABEL><IDENT>role</IDENT></LABEL>
<ITEM>specifies the nature of the intellectual responsibility. Sample
values include <MENTIONED>translator</MENTIONED>,
<MENTIONED>compiler</MENTIONED>,
<MENTIONED>illustrator</MENTIONED>, etc.; the default value is
<MENTIONED>editor</MENTIONED>. </ITEM></LIST></ITEM>
<LABEL><GI>imprint</GI></LABEL>
<ITEM>groups information relating to the publication or distribution
of a bibliographic item.  </ITEM>
<LABEL><GI>publisher</GI></LABEL>
<ITEM>provides the name of the organization responsible for the
publication or distribution of a bibliographic item.</ITEM>
<LABEL><GI>pubPlace</GI></LABEL>
<ITEM>contains the name of the place where a bibliographic item was
published.</ITEM>
<LABEL><GI>series</GI></LABEL>
<ITEM>contains information about the series in which a book or other
bibliographic item has appeared.</ITEM>
<LABEL><GI>title</GI></LABEL>
<ITEM>contains the title of a work, whether article, book, journal,
or series, including any alternative titles or subtitles. Attributes
include
<LIST TYPE="gloss">
<LABEL><IDENT>type</IDENT></LABEL>
<ITEM>categorizes the title in some way, for example as a
<MENTIONED>main</MENTIONED>,
<MENTIONED>subordinate</MENTIONED>, etc. </ITEM>
<LABEL><IDENT>level</IDENT></LABEL>
<ITEM>indicates the bibliographic <TERM>level</TERM> or class of
title. Legal values are described in section <PTR TARGET="faces"></ITEM>
</LIST></ITEM></LIST>
</P>
<P>For example, the following editorial note might be transcribed as
shown:
<!-- Cook on Malory, p. 1 -->
<Q REND="display">He was a member of Parliament for Warwickshire in
1445, and died March 14, 1470 (according to Kittredge, <TITLE>Harvard
Studies</TITLE> 5. 88ff).
</Q>
<EG><![CDATA [He was a member of Parliament for Warwickshire in 1445, and died
March 14, 1470 (according to <bibl><author>Kittredge</author>,
<title>Harvard Studies</title> <biblScope>5. 88ff</biblScope></bibl>).
]]>
</EG></P>
<P>For lists of bibliographic citations, the <GI>listBibl</GI>
element should be used; it may contain a series of <GI>bibl</GI>
elements.  For an example, see the list in <PTR TARGET="bibapp">.</P></DIV1>
<DIV1 ID="tables"><HEAD>Tables</HEAD>
<P>Tables represent a sizable challenge for any text processing
system, but simple tables, at least, appear in so many texts that even
in the simplified TEI tag set presented here, markup for tables is
necessary.  The following  elements are provided for this purpose:
<LIST TYPE="gloss">
<LABEL><GI>table</GI></LABEL>
<ITEM>contains text displayed in tabular form, in rows and columns.
Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>rows</IDENT></LABEL>
<ITEM>indicates the number of rows in the table.</ITEM>
<LABEL><IDENT>cols</IDENT></LABEL>
<ITEM>indicates the number of columns in each row of the table.</ITEM>
</LIST></ITEM>
<LABEL><GI>row</GI></LABEL>
<ITEM>contains one row of a table. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>role</IDENT></LABEL>
<ITEM>indicates the kind of information held in the cells of this
row. Suggested values include
<KW>label</KW> for labels or descriptive information, and <KW>data</KW>
for actual data values.
</ITEM>
</LIST>
</ITEM>
<LABEL><GI>cell</GI></LABEL>
<ITEM>contains one cell of a table. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>role</IDENT></LABEL>
<ITEM>indicates the kind of information held in the cell. Suggested
values include
<KW>label</KW> for labels or descriptive information, and <KW>data</KW>
for actual data values.</ITEM>
<LABEL><IDENT>cols</IDENT></LABEL>
<ITEM>indicates the number of columns occupied by this cell.</ITEM>
<LABEL><IDENT>rows</IDENT></LABEL>
<ITEM>indicates the number of rows occupied by this cell.</ITEM>
</LIST></ITEM></LIST>
</P>
<P>For example, Defoe uses mortality tables like the following in the
<TITLE LEVEL="M">Journal of the Plague Year</TITLE> to show the rise
and ebb of the epidemic:<EG><![CDATA [<p>It was indeed coming on amain, for the burials that
same week were in the next adjoining parishes thus:&mdash;
<table rows=5 cols=4>
<row role='data'>
<cell role='label'>St. Leonard's, Shoreditch</cell>
      <cell>64</cell> <cell>84</cell> <cell>119</cell></row>
<cell role='label'>St. Botolph's, Bishopsgate</row>
      <cell>65</cell> <cell>105</cell> <cell>116</cell></row>
<cell role='label'>St. Giles's, Cripplegate</row>
     <cell>213</cell> <cell>421</cell> <cell>554</cell></row>
</table>
<p>This shutting up of houses was at first counted a very cruel
and unchristian method, and the poor people so confined made
bitter lamentations. ... </p>
]]></EG>
<!-- following row was cut for simplicity of exposition: <row
role='label'><cell></cell><cell></cell>     <cell>The next week
prodigiously increased, as:</cell>    <cell>To the 1st of Aug.
thus:</cell> </row>
-->
<!-- Daniel Defoe, A Journal of the Plague Year, ... ed. Anthony
Burgess and Christopher Bristow (1722; Harmondsworth:  Penguin, 1966),
pp. 66-67.--><!-- Not clear what to do about making these tables
either easily displayable or easily editable using existing software.
A version of TEI Lite with the extra elements and attributes needed by
Author/Editor?  A different one for Adept?  Perhaps one could define
all the extra attributes as FIXED, so they wouldn't get exported, but
that still doesn't provide for the extra elements.--><!--SQ table
model has tblBody (our table), tblCDefs, tblCDef, tblRows, tblRow (our
row), tblCell (our cell).  SQ requires attributes for row and column
separators (on cdefs, cdef, rows, row, and cell, as needed),
horizontal and vertical alignment, measurement units, and row and
column starting position (as well as spans) of cells.
--></P>
</DIV1>
<DIV1 ID="xfigs"><HEAD>Figures and Graphics</HEAD>
<P>Not all the components of a document are necessarily textual. The
most straightforward text will often contain diagrams or
illustrations, to say nothing of documents in which image and text are
inextricably intertwined, or electronic resources in which the two are
complementary. </P>
<P>The encoder may simply record the presence of a graphic within the
text, possibly with a brief description of its content, by using the
elements described in this section. The same elements may also be used
to embed digitized versions of the graphic within an electronic
document.
<LIST TYPE="gloss">
<LABEL><GI>figure</GI></LABEL>
<ITEM>marks the spot at which a graphic is to be inserted in a
document. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>entity</IDENT></LABEL>
<ITEM>the name of a pre-defined system entity containing a digitized
version of the graphic to be inserted.</ITEM></LIST></ITEM>
<LABEL><GI>figDesc</GI></LABEL>
<ITEM>contains a textual description of the appearance or content of
a graphic, for use when documenting an image without displaying it.</ITEM>
</LIST></P>
<P>Any textual information accompanying the graphic, such as a
heading and/or caption, may be included within the <GI>figure</GI>
element itself, in a <GI>head</GI> and one or more <GI>p</GI>
elements, as may also any text appearing within the graphic itself. It
is strongly recommended that a prose description of the image be
supplied, as the content of a <GI>figDesc</GI> element, for the use
of applications which are not able to render the graphic, and to
render the document accessible to vision-impaired readers. (Such text
is not normally considered part of the document proper.)
</P>
<P>The simplest use for these elements is to mark the position of a
graphic, as in this example;
<EG><![CDATA [<pb n=412>
<figure></figure>
<pb n=413>
]]>
</EG> (Note that the end-tag may not be omitted, even though the
element has no content). More usually, a graphic will have at the
least an identifying title, which should be encoded using the <GI>head</GI>
element. It is also often convenient to include a brief description of
the image, as in the following example:
<EG><![CDATA [  <figure>
    <head>Mr Fezziwig's Ball</head>
    <figdesc>A Cruikshank engraving showing Mr Fezziwig leading
       a group of revellers.</figdesc>
  </figure>
]]>
</EG></P>
<P>When a digitized version of the graphic concerned is available, it
is clearly preferable to embed it at the appropriate point within the
document. Graphic elements such as pictures are typically stored in
separate entities (files) from those containing the text of a
document, and using a different notation (storage format). The TEI
Lite DTD supports graphics encoded using the CGM, TIFF, or JPEG
standards under the SGML notation names <MENTIONED>cgm</MENTIONED>,
<MENTIONED>tiff</MENTIONED>, and <MENTIONED>jpeg</MENTIONED>.<NOTE>Other
notations may however be used, provided that an appropriate NOTATION
declaration is added to the DTD; see the chapter on tables, formulae,
and graphics in TEI P3, or any reference work on SGML, for more
details of the SGML <KW>NOTATION</KW> declaration.</NOTE><!--
assuming MSM agrees with me that we should include Notation declns -->
<!-- for these within TEI Lite.... -->
<!-- yep. i do... --></P>
<P>Whatever format is used to encode the image, it may be embedded
within the document in the same way. The first step is to declare an
SGML entity of a particular type, which specifies a name for the
entity, an external identifier (such as a file name) for it, and the
notation used. For example, assuming that the digitized image of Mr
Fezziwig's ball were held in TIFF format in the file
<IDENT>fezzi.tff</IDENT>, an entity declaration like the following
would be necessary:
<EG><![CDATA [<!ENTITY fezziPic SYSTEM "fezzi.tff" NDATA tiff>
]]>
</EG> All such declarations must be processed before the SGML document
itself;
<!-- either -->
<!-- ways of doing this are beyond the scope of the present document.
--><!-- or --> with the TEI Lite DTD this may be accomplished by
including them in a file called <IDENT>litedecls.ent</IDENT> or
whatever file has the public identifier <IDENT>-//TEI U5-1995//DTD
TEI Lite 1.0 Extensions//EN</IDENT>.<!-- need a section/appendix on
putting it all together -->
</P>
<P>With the above declaration in force, all that is necessary to
embed the digitized image at the appropriate point in the document is
to supply a value for the <IDENT>entity</IDENT> attribute of the <GI>figure</GI>
element:
<EG><![CDATA [  <figure entity=fezziPic>
   <head>Mr Fezziwig's Ball</head>
   <figdesc>A Cruikshank engraving showing Mr Fezziwig leading
      a group of revellers.</figdesc>
   </figure>
]]>
</EG></P></DIV1>
<DIV1 ID="X4"><HEAD>Interpretation and Analysis</HEAD>

<P>It is often said that <EMPH>all</EMPH> markup is a form of
interpretation or analysis.  While it is certainly difficult, and may
be impossible, to distinguish firmly between
<SOCALLED>objective</SOCALLED> and <SOCALLED>subjective</SOCALLED>
information in any universal way, it remains true that judgments
concerning the latter are typically regarded as more likely to provide
controversy than  those concerning the former.  Many scholars
therefore prefer to record such interpretations only if it is possible
to alert the reader that they are considered more open to dispute,
than the rest of the markup. This section describes some of the
elements provided by the TEI scheme to meet this need. </P>

<DIV2><HEAD>Orthographic Sentences</HEAD>
<P>Interpretation typically ranges across the whole of a text, with
no  particular respect to other structural units. A useful preliminary
to intensive interpretation is therefore to segment the text into
discrete and identifiable units, each of which can then bear a label
for use as a sort of <SOCALLED>canonical reference</SOCALLED>.  To
facilitate such uses, these units may not cross each other, nor nest
within each other. They may conveniently be represented using the
following element:
<LIST TYPE="gloss">
<LABEL><GI>s</GI></LABEL>
<ITEM>identifies an <TERM>s-unit</TERM> within a document, for
purposes of establishing a simple canonical referencing scheme
covering the entire text. Attributes include
<LIST TYPE="gloss">
<LABEL><IDENT>type</IDENT></LABEL>
<ITEM>categorizes the unit (e.g. as <MENTIONED>declarative</MENTIONED>,
<MENTIONED>interrogative</MENTIONED>, etc.)</ITEM></LIST></ITEM>
</LIST></P>
<P>As the name suggests, the <GI>s</GI> element is most commonly
used (in linguistic applications at least) for marking <TERM>orthographic
sentences</TERM>, that is, units defined by orthographic features such
as punctuation.  For example, the passage from
<TITLE>Jane Eyre</TITLE> discussed earlier might be divided into
s-units as follows:<EG><![CDATA [<pb n='474'>
<div1 type=chapter n='38'>
<p><s n=001>Reader, I married him.</s>
<s n=002>A quiet wedding we had:</s>
<s n=003>he and I, the parson and clerk, were alone present.</s>
<s n=004>When we got back from church, I went
into the kitchen of the manor-house, where Mary was cooking the dinner,
and John cleaning the knives, and I said &dash;</s>
<p><q><s n=005>Mary, I have been married to Mr Rochester
this morning.</s></q> ...
]]>
</EG> The end-tags shown above are not strictly necessary, since <GI>s</GI>
elements cannot nest: the beginning of one <GI>s</GI> element implies
that the previous one has finished. When s-units are tagged as shown
above, it is advisable to tag the entire text end-to-end, so that
every word in the text being analysed will be contained by exactly one
<GI>s</GI> element, whose identifier can then be used to specify a
unique reference for it. If the identifiers used are unique within the
document, then the <IDENT>id</IDENT> attribute might be used in
preference to the <IDENT>n</IDENT> used in the above example.</P></DIV2>

<DIV2><HEAD>General-Purpose Interpretation Elements</HEAD>

<P>A more general purpose segmentation element, the <GI>seg</GI> has
already been introduced for use in identifying otherwise unmarked
targets of cross references and hypertext links (see section <PTR TARGET="xr"
>); it identifies some phrase-level portion of text to which the
encoder may assign a user-specified <IDENT>type</IDENT>, as well as a
unique identifier; it may thus be used to tag textual features for
which there is no provision in the published TEI Guidelines.
</P>
<P>For example, the Guidelines provide no <GI TEI="NO">apostrophe</GI>
element to mark parts of a literary text in which the narrator
addresses the reader (or hearer) directly. One approach might be to
regard these as instances of the <GI>q</GI> element, distinguished
from others by an appropriate value for the
<IDENT>who</IDENT> attribute. A possibly simpler, and certainly more
general, solution would however be to use the <GI>seg</GI> element as
follows:<EG><![CDATA [<div1 type=chapter n='38'>
<p><seg type='apostrophe'>Reader, I married him.</seg>
A quiet wedding we had: ...]]>
</EG> The <IDENT>type</IDENT> attribute on the <GI>seg</GI> element
can take any value, and so can be used to record phrase-level
phenomena of any kind; it is good practice to record the values used
and their significance in the header.
</P>
<P>A <GI>seg</GI> element of one type (unlike the <GI>s</GI>
element which it superficially resembles) can be nested within a <GI>seg</GI>
element of the same or another type. This enables quite complex
structures to be represented; some examples were given in section
<PTR TARGET="xatts"> above. However, because it must respect the
requirement of SGML that elements be properly nested, and may not cut
across each other, it cannot cope with the common requirement to
associate an interpretation with arbitrary segments of a text which
may completely ignore the document hierarchy. It also requires that
the interpretation itself be represented by a single coded value in
the
<IDENT>type</IDENT> attribute.</P>
<P>Neither restriction applies to the <GI>interp</GI> element, which
provides powerful features for the encoding of quite complex
interpretive information in a relatively straightforward manner.
<LIST TYPE="gloss">
<LABEL><GI>interp</GI></LABEL>
<ITEM>provides for an interpretive annotation which can be  linked to
a span of text. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>value</IDENT></LABEL>
<ITEM>identifies the specific phenomenon being annotated.</ITEM>
<LABEL><IDENT>resp</IDENT></LABEL>
<ITEM>indicates who is responsible for the interpretation.</ITEM>
<LABEL><IDENT>type</IDENT></LABEL>
<ITEM>indicates what kind of phenomenon is being noted in the
passage. Sample values include
<CODE>image</CODE>,
<CODE>character</CODE>,
<CODE>theme</CODE>, <CODE>allusion</CODE>, or the name of a
particular discourse type whose instances are being identified.
</ITEM>
<LABEL><IDENT>inst</IDENT></LABEL>
<ITEM>points to instances of the analysis or interpretation
represented by the current element.</ITEM>
</LIST>
</ITEM>
<LABEL><GI>interpGrp</GI></LABEL>
<ITEM>collects together <GI>interp</GI> tags.</ITEM>
</LIST> This elements allows the encoder to specify both the class of
an interpretation, and the particular instance of that class which the
interpretation involves. Thus, whereas with <GI>seg</GI> one can say
simply that something is an apostrophe, with
<GI>interp</GI> one can say that it is an instance (apostrophe) of a
larger class (rhetorical figures).</P>
<P>Moreover, <GI>interp</GI>  is an empty element, which must be
linked to the passage to which it applies either by means of the
<IDENT>ana</IDENT>  attribute discussed in section <PTR TARGET="xatts">
 above, or by means of its own <IDENT>inst</IDENT> attribute. This
means that any kind of analysis can be represented, with no need to
respect the SGML document hierarchy, and also facilitates the grouping
of analyses of a particular type together. A special purpose <GI>interpGrp</GI>
element is provided for the latter purpose.
</P>
<P>For example, suppose that you wish to mark such diverse aspects of
a text as  themes or subject matter, rhetorical figures, and the
locations of individual scenes of the narrative. Different portions of
our sample passage from <TITLE>Jane Eyre</TITLE> for example, might
be associated with the rhetorical figures of apostrophe, hyperbole,
and metaphor; with subject-matter references to churches, servants,
cooking, postal service, and honeymoons; and with scenes located in
the church, in the kitchen, and in an unspecified location (drawing
room?).
</P>
<P>These interpretations could be placed anywhere within the <GI>text</GI>
element; it is however good practice to put them all in the same place
(e.g. a separate section of the front or back matter), as in the
following example:
<EG><![CDATA [<back>
<div1 type='Interpretations'>
<interp id='fig-apos'  resp='LB, MSM'
     type='figure of speech' value='apostrophe'>
<interp id='fig-hyp'   resp='LB, MSM'
     type='figure of speech' value='hyperbole'>
<!-- ... -->
<interp id='set-church'  resp='LB, MSM'
     type='setting' value='church'>
<!-- ... -->
<interp id='ref-church'  resp='LB, MSM'
     type='reference' value='church'>
<interp id='ref-serv'    resp='LB, MSM'
     type='reference' value='servants'>
<!-- ... -->
</p></div>
]]></EG>
<!-- the best place for this list would be in the ANALYSIS
--><!-- element of the header, but meta class elements are only --><!--
allowed within TEXT.  bug, perhaps.  or nwi. -->
<!-- aye capn definitely looks like a bug to me (lb) -->
</P>
<P>The evident redundancy of this encoding can be considerably
reduced by using the <GI>interpGrp</GI> element to group together all
those <GI>interp</GI> elements which share common attribute values,
as follows: 
<EG><![CDATA [<back>
<div1 type='Interpretations'>
<interpGrp type='figure of speech' resp='LB, MSM'>
<interp id='fig-apos' value='apostrophe'>
<interp id='fig-hyp'  value='hyperbole'>
<interp id='fig-meta' value='metaphor'>
<!-- ... -->
</interpGrp>
<interpGrp type='scene-setting' resp='LB, MSM'>
<interp id='set-church'  value='church'>
<interp id='set-kitch'   value='kitchen'>
<interp id='set-unspec'  value='unspecified'>
<!-- ... -->
</interpGrp>
<interpGrp type='reference' resp='LB, MSM'>
<interp id='ref-church' value='church'>
<interp id='ref-serv'   value='servants'>
<interp id='ref-cook'   value='cooking'>
<!-- ... -->
</interpGrp>
</p></div>
]]></EG></P>
<P>Once these interpretation elements have been defined, they can be
linked with the parts of the text to which they apply in either or
both of two ways. The <IDENT>ana</IDENT> attribute can be used on
whichever element is appropriate:
<EG><![CDATA [<div1 type=chapter n='38'>
<p id='P38.1' ana='set-church set-kitch'>
<s id=P38.1.1 ana='fig-apos'>Reader, I married him.</s>
...
]]>
</EG> Note in this example that since the paragraph has two settings
(in the church  and in the kitchen), the identifiers of both have been
supplied.
</P>
<P>Alternatively, the <GI>interp</GI> elements can point to all the
parts of the text to which they apply, using their <IDENT>inst</IDENT>
attribute:
<EG><![CDATA [<interp id='fig-apos' type='figure of speech' resp='LB, MSM'
   value='apostrophe' inst='P38.1.1'>
<!-- ... -->
<interp id='set-church'  type='scene-setting' value='church'
   inst='P38.1' resp='LB, MSM'>
<interp id='set-kitchen' type='scene-setting' value='kitchen'
   inst='P38.1' resp='LB, MSM'>
<!-- ... -->
]]></EG></P>
<P>The <GI>interp</GI> is not limited to any particular type of
analysis, The literary analysis shown above is but one possibility;
one could equally well use <GI>interp</GI> to capture a linguistic
part-of-speech analysis. For example, the example sentence given in
section <PTR TARGET="xatts"> assumes a linguistic analysis which
might be represented as follows:
<EG><![CDATA [<interp id=NP1 type=pos value='noun phrase, singular'>
<interp id=VV1 type=pos value='inflected verb, present-tense singular'>
...

]]></EG></P>
</DIV2></DIV1>
<DIV1 ID="techdoc"><HEAD>Technical Documentation</HEAD>
<P>Although the focus of this document is on the use of the TEI
scheme for the encoding of existing <SOCALLED>pre-electronic</SOCALLED>
documents,  the same scheme may also be used for the encoding of new
documents. In the preparation of new documents (such as this one),
SGML has much to recommend it: the document's structure can be clearly
represented, and the same electronic text can be re-used for many
purposes --- to provide both online hypertext or browsable versions
and well-formatted typeset versions from a common SGML source for
example. </P>
<P>To facilitate this, a small number of additional elements are
included in TEI Lite as extensions of the main TEI DTD, for use in
marking particular features of technical documents in general, and of
SGML-related documents in particular.</P>
<DIV2><HEAD>Additional Elements for Technical Documents</HEAD>
<P>The following elements may be used to mark particular features of
technical documents:
<LIST TYPE="gloss">
<LABEL><GI>eg</GI></LABEL>
<ITEM>contains a single short example of some technical topic being
discussed, e.g. a code fragment or a sample of SGML encoding.</ITEM>
<LABEL><GI>code</GI></LABEL>
<ITEM>contains a short fragment of code in some formal language
(often a programming language).</ITEM>
<LABEL><GI>ident</GI></LABEL>
<ITEM>contains an identifier of some kind, e.g. a variable name or
the name of an SGML element or attribute.</ITEM>
<LABEL><GI>gi</GI></LABEL>
<ITEM>contains a special type of identifier: an SGML generic
identifier, or element name.</ITEM>
<LABEL><GI>kw</GI></LABEL>
<ITEM>contains a keyword in some formal language.</ITEM>
<LABEL><GI>formula</GI></LABEL>
<ITEM>contains a mathematical or chemical formula, optionally
presented in some non-SGML notation. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>notation</IDENT></LABEL>
<ITEM>specifies the notation used to represent the body of the
formula. Default value is
<KW>tex</KW>, meaning the formula is represented using the TeX
typesetting system.</ITEM></LIST></ITEM></LIST>
</P>
<P>The following example shows how these elements might be used to
encode a passage from a tutorial introducing the Fortran programming
language:
<EG><![CDATA [<p>It is traditional to introduce a language with a program like the
following:
<eg>
   CHAR*12 GRTG
   GRTG = 'HELLO WORLD'
   PRINT *, GRTG
   END
</eg></p>
<p>This simple example first declares a variable <ident>GRTG</ident>, in
the line <code>CHAR*12 GRTG</kw>, which identifies <ident>GRTG</ident>
as consisting of 12 bytes of type <kw>CHAR</kw>.  To this variable,
the value <mentioned>HELLO WORLD</mentioned>
is then assigned. This is followed by a <kw>PRINT</kw> statement and an
<kw>END</kw> statement.
]]>
</EG>
</P>
<P>A formatting application, given a text like that above, can be
instructed to format examples appropriately (e.g. to preserve line
breaks, or to use a distinctive font). Similarly, the use of tags such
as <GI>ident</GI> and <GI>kw</GI> greatly facilitates the
construction of a useful index.</P>
<P>The <GI>formula</GI> element should be used to enclose a
mathematical or chemical formula presented within the text as a
distinct item. Since formulae generally include a large variety of
special typographic features not otherwise present in ordinary text,
it will usually be necessary to present the body of the formula in a
specialized notation. The notation used should be specified by the
<IDENT>notation</IDENT> attribute, as in the following example:
<EG><![CDATA [<formula notation=tex>
  \(E = mc^{2}\)
</formula>
]]>
</EG></P>
<P>The <IDENT>Tex</IDENT> notation is pre-defined for the TEI Lite
DTD; other notations may be used if desired, but they must first be
defined by a <KW>notation</KW> declaration within the DTD.
</P>
<P>Almost any sequence of characters is permitted within the body of
a <GI>formula</GI> element, as far as an SGML-aware processor is
concerned. The data is passed unchanged by the parser to whatever
application has been associated with the notation specified. The only
exception to this rule is that the parser <EMPH>will</EMPH> recognize
anything that resembles the start of an SGML end-tag, i.e. the
character less-than (&lt;) followed immediately by a solidus (/) and
an alphabetic character. The following imaginary example would thus
cause a confusing sequence of SGML parser errors:
<EG><![CDATA [<formula notation=tex>
  \(E = mc^{2}</a\)
</formula>
]]>
</EG> Fortunately, the sequence <KW><![RCDATA [&lt;/]]></KW> is
quite unlikely to occur in most mathematical notations in practical
use: if it does occur, special steps must be taken which are beyond
the scope of this document (see the full Guidelines for more
information).
</P>
<P>This problem exists in a more acute form when SGML encoding is the
subject of discussion within a technical document, itself encoded in
SGML. In such a document, it is clearly essential to distinguish
clearly the SGML markup occurring within examples from that marking up
the document itself, and end-tags are highly likely to occur. The most
general solution is to mark off the body of each SGML example as
containing data which is not to be scanned for SGML mark-up by the
parser. This is achieved by enclosing it within  a special SGML
construct called a <TERM><KW>CDATA</KW> marked section</TERM>,  as
in the following example:
<EG><![RCDATA [<p>A list should be encoded as follows:
<eg><![ CDATA [
   <list>
   <item>First item in the list</item>
   <item>Second item</item>
   </list>
&rsqb;]>
</eg>
The <gi>list</gi> element consists of a series of <gi>item</gi>
elements.
]]>
</EG></P>
<P>The <GI>list</GI> element used within the example above will not
be regarded as forming part of the document proper, because it is
embedded within a marked section (beginning with the special markup
declaration <KW><![RCDATA [&lt;![ CDATA [ ]]></KW>, and ending with
<KW><![RCDATA [&rsqb;&rsqb;>]]></KW>).</P>
<P>Note also the use of the <GI>gi</GI> element to tag references to
SGML element names (or <TERM>generic identifiers</TERM>) within the
body of the text.</P>
</DIV2>
<DIV2><HEAD>Generated Divisions</HEAD>
<P>Most modern document production systems have the ability to
generate automatically whole sections such as a table of contents or
an index. The TEI Lite scheme provides an element to mark the location
at which such a generated section should be placed.
<LIST TYPE="gloss">
<LABEL><GI>divGen</GI></LABEL>
<ITEM>indicates the location at which a textual division generated
automatically by a text-processing application is to appear.
Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>type</IDENT></LABEL>
<ITEM>specifies what type of generated text division (e.g. index,
table of contents, etc.) is to appear. Sample values include:
<CODE>index</CODE> (an index is to be generated and inserted at this
point),
<CODE>toc</CODE> (a table of contents)
<CODE>figlist</CODE> (a list of figures)
<CODE>tablist</CODE> (a list of tables).</ITEM></LIST>
</ITEM></LIST>
</P>
<P>The <GI>divGen</GI> element can be placed anywhere that a
division element would be legal, as in the following example:
<EG><![CDATA [<front>
<titlePage> ... </titlePage>
<divGen type=toc>
<div type='Preface'><head>Preface</head> ... </div>
</front>
<body> ... </body>
<back>
<div1><head>Appendix</head> ... </div1>
<divGen type=index n='Index'>
</back>
]]></EG></P>
<P>This example also demonstrates the use of the <IDENT>type</IDENT>
attribute to distinguish the different kinds of division to be
generated: in the first case a table of contents (a
<MENTIONED>toc</MENTIONED>) and in the second an index.</P>
<P>When an existing index or table of contents is to be encoded
(rather than one being generated) for some reason, the
<GI>list</GI> element discussed in section <PTR TARGET="lists">
should be used. </P></DIV2>

<DIV2 ID="index"><HEAD>Index Generation</HEAD>

<P>While production of a table of contents from a properly tagged
document is generally unproblematic for an automatic processor, the
production of a good quality index will often require more careful
tagging. It may not be enough simply to produce a list of all parts
tagged in some particular way, although extracting (for example) all
occurrences of elements such as <GI>term</GI> or <GI>name</GI> will
often be a good departure point for an index. </P>
<P>The TEI DTD provides a special purpose <GI>index</GI> tag which
may be used to mark both the parts of the document which should be
indexed, and how the indexing should be done.
<LIST TYPE="gloss">
<LABEL><GI>index</GI></LABEL>
<ITEM>marks a location to be indexed for some purpose. Attributes
include:<LIST TYPE="gloss">
<LABEL><IDENT>level1</IDENT></LABEL>
<ITEM>gives the main form of the index entry.</ITEM>
<LABEL><IDENT>level2</IDENT></LABEL>
<ITEM>gives the second-level form, if any.</ITEM>
<LABEL><IDENT>level3</IDENT></LABEL>
<ITEM>gives the third-level form, if any.</ITEM>
<LABEL><IDENT>level4</IDENT></LABEL>
<ITEM>gives the fourth-level form, if any.</ITEM>
<LABEL><IDENT>index</IDENT></LABEL>
<ITEM>indicates which index (of several) the index entry belongs to.</ITEM>
</LIST></ITEM></LIST></P>
<P>For example, the second paragraph of this section might include
the following:<EG><![CDATA [...
TEI lite also provides a special purpose <gi>index</gi> tag
<index level1='indexing'>
<index level1='index (tag)' level2='use in index generation'>
which may be used ...
]]></EG></P>
<P>The <GI>index</GI> element can also be used to provide a form of
interpretive or analytic information.  For example, in a study of
Ovid, it might be desired to record all the poet's references to
different figures, for comparative stylistic study.  In the following
lines of the <TITLE>Metamorphoses</TITLE>, such a study would record
the poet's references to Jupiter (as
<MENTIONED>deus</MENTIONED>, <MENTIONED>se</MENTIONED>, and as the
subject of <MENTIONED>confiteor</MENTIONED> [in inflectional form
number 227]), to Jupiter-in-the-guise-of-a-bull (as
<MENTIONED>imago tauri fallacis</MENTIONED> and the subject of
<MENTIONED>teneo</MENTIONED>), and so on.<NOTE PLACE="foot">The
analysis is taken, with permission, from Willard McCarty and Burton
Wright, <TITLE>An Analytical Onomasticon to the Metamorphoses of Ovid</TITLE>
(Princeton: Princeton University Press, forthcoming).  Some
simplifications have been undertaken.</NOTE>
<EG><![CDATA [<l n=3.001>iamque deus posita fallacis imagine tauri
<l n=3.002>se confessus erat Dictaeaque rura tenebat
]]>
</EG> This need might be met using the <GI>note</GI> element
discussed in section in <PTR TARGET="z633">, or with the <GI>interp</GI>
element discussed in section <PTR TARGET="X4">. Here we demonstrate
how it might also be satisfied by using the <GI>index</GI> element.
</P>
<P>We assume that the object is to generate more than one index: one
for names of deities (called <IDENT>dn</IDENT>), another for
onomastic references (called <IDENT>on</IDENT>), <!-- ??? or maybe
not - I haven't the faintest idea myself --> a third for pronominal
references (called <IDENT>pr</IDENT>) and so forth. One way of
achieving this might be as follows:
<EG><![CDATA [<l n=3.001>iamque deus posita fallacis imagine tauri
     <index index="dn" level1="Iuppiter" level2="deus">
     <index index="on" level1="Iuppiter (taurus)"
                       level2="imago tauri fallacis"></l>
<l n=3.002>se confessus erat Dictaeaque rura tenebat
     <index index="pr"    level1="Iuppiter" level2="se">
     <index index="v"     level1="Iuppiter" level2="confiteor (v227)">
     <index index="mons"  level1="Dicte" level2="rura Dictaea">
     <index index="regio" level1="Creta" level2="rura Dictaea">
     <index index="v"     level1="Iuppiter (taurus)"
                          level2="teneo (v9)"></l>

]]>
</EG>  For each <GI>index</GI> element above, an entry will be
generated in the appropriate index, using  as headword the value of
the
<IDENT>level1</IDENT> attribute, and as secondary keyword that of the
<IDENT>level2</IDENT> attribute, which contains the word cited in
nominative form. The actual reference will be taken from the context
in which the <GI>index</GI> element appears, i.e. in this case the
identifier of the <GI>l</GI> element containing it. 
</P></DIV2>
</DIV1>
<DIV1 ID="chars"><HEAD>Character Sets, Diacritics, etc.</HEAD>
<P>For those working with standard forms of the European languages,
the TEI recommendations for character set use are simple.  For local
use, use whatever character set is supported by your machine and your
software.  If your software makes direct keyboard entry of special
characters difficult, you may elect to define your own keyboarding
conventions (for example to represent accented letters by typing the
appropriate accent immediately after the letter, or by using special
sequences unlikely to appear in normal text such as <MENTIONED>aE</MENTIONED>
for
<MENTIONED>&auml;</MENTIONED>). Global search and replace functions
can then be used to turn these keyboard shorthands into the proper
characters. If you work with non-Latin scripts and there is a standard
transliteration scheme in your field (e.g. for ancient Greek the beta
code of the Thesaurus Lingu&aelig; Gr&aelig;c&aelig;), use it.  Any
transliteration used should be reversible (this rules out a surprising
number of schemes commonly used in normal writing) and will be most
usable if it requires no special ligatures, ties, or diacritics (this
rules out a surprising number of the remainder).
</P>
<P>For interchange of files among systems, use SGML entity references
to replace all characters not in the following list of characters
which almost always survive electronic interchange intact:
<EG><![CDATA [a b c d e f g h i j k l m n o p q r s t u v w x y z
A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
0 1 2 3 4 5 6 7 8 9
" % & ' ( ) * + , - . / : ; < = > ? _   (space)
]]>
</EG>This list excludes the following characters which, to the
frequent annoyance of unsuspecting users, often do not survive
transfer across national boundaries or over standard wide-area
networks.  If you're just going from your Mac to your PC, though,
these characters will probably be safe:
<EG><![CDATA [! # $ [ \ ] ^ ` { } | ~]]>
</EG></P>
<P>To ensure proper transmission across multi-vendor networks, entity
references must be used for all accented and extended-Latin
characters, all non-Latin characters, and all symbols not on
conventional computer keyboards.
</P>
<P>You may use your own SGML entity names in TEI-conformant files, if
you wish and if you provide standard SGML entity declarations for
them, but the standard names (though long-winded) have the advantage
of clarity; the characters intended are reasonably clear to any
speaker of English who recognizes that a character is being named,
often even without recourse to any list.  This is not true of many
other schemes for representing accented characters.
</P>
<P>The entity names needed for the characters listed above as
<SOCALLED>unsafe</SOCALLED> and for the accented characters of some
major Western European languages are given below. Lists of public
entity sets and their contents are available in any reference work on
SGML:  the names given below are from ISO public entity sets, are
widely used, and are therefore recommended.</P>
<P>When the character you need does not appear in the public entity
sets, you may wish to generate a name using the same naming
conventions used in ISO public entity sets, as described here:<LIST TYPE="gloss"
>
<LABEL>digraphs
</LABEL>
<ITEM>Form entity names for digraphs by appending the string
<MENTIONED>lig</MENTIONED> to the letters forming the digraph. If a
capitalized form is required, both letters are given in upper case
(remember that case is usually significant in entity names).  E.g.:
<MENTIONED>aelig</MENTIONED> (&aelig;), <MENTIONED>AElig</MENTIONED>
(&AElig;) <MENTIONED>szlig</MENTIONED> (&szlig;).</ITEM>
<LABEL>diacritics and accents
</LABEL>
<ITEM>Form entity names for accented letters in most Western European
languages by appending one of the following strings to the letter
bearing the accent, which may be in upper or lower case.</ITEM>
<LABEL>umlaut</LABEL>
<ITEM> use <MENTIONED>uml</MENTIONED> for umlaut or trema: e.g.
<MENTIONED>auml</MENTIONED> (&auml;), <MENTIONED>Auml</MENTIONED> (&Auml;),
<MENTIONED>euml</MENTIONED> (&euml;), <MENTIONED>iuml</MENTIONED>
(sic:  &iuml;), <MENTIONED>ouml</MENTIONED> (&ouml;),
<MENTIONED>Ouml</MENTIONED> (&Ouml;), <MENTIONED>uuml</MENTIONED> (&uuml;),
<MENTIONED>Uuml</MENTIONED> (&Uuml;).</ITEM>
<LABEL>acute</LABEL>
<ITEM> use <MENTIONED>acute</MENTIONED> for acute or stressed
accent: e.g. <MENTIONED>aacute</MENTIONED> (&aacute;),
<MENTIONED>eacute</MENTIONED> (&eacute;), <MENTIONED>Eacute</MENTIONED>
(&Eacute;),
<MENTIONED>iacute</MENTIONED> (&iacute;), <MENTIONED>oacute</MENTIONED>
(&oacute;),
<MENTIONED>uacute</MENTIONED> (&uacute;).</ITEM>
<LABEL>grave</LABEL>
<ITEM> use <MENTIONED>grave</MENTIONED> for grave accent: e.g.
<MENTIONED>agrave</MENTIONED> (&agrave;), <MENTIONED>egrave</MENTIONED>
(&egrave;), <MENTIONED>igrave</MENTIONED> (&igrave;),
<MENTIONED>ograve</MENTIONED> (&ograve;), <MENTIONED>ugrave</MENTIONED>
(&ugrave;).
</ITEM>
<LABEL>circumflex</LABEL>
<ITEM> use <MENTIONED>circ</MENTIONED> for circumflex: e.g.
<MENTIONED>acirc</MENTIONED> (&acirc;), <MENTIONED>ecirc</MENTIONED>
(&ecirc;), <MENTIONED>Ecirc</MENTIONED> (&Ecirc;), <MENTIONED>icirc</MENTIONED>
(&icirc;), <MENTIONED>ocirc</MENTIONED> (&ocirc;), <MENTIONED>ucirc</MENTIONED>
(&ucirc;).</ITEM><!-- Need Acirc Icirc Ocirc Ucirc -->
<LABEL>tilde</LABEL>
<ITEM> use <MENTIONED>tilde</MENTIONED> for tilde: e.g.
<MENTIONED>atilde</MENTIONED> (&atilde;), <MENTIONED>Atilde</MENTIONED>
(&Atilde;),
<MENTIONED>ntilde</MENTIONED> (&ntilde;), <MENTIONED>Ntilde</MENTIONED>
(&Ntilde;),
<MENTIONED>otilde</MENTIONED> (&otilde;), <MENTIONED>Otilde</MENTIONED>
(&Otilde;).
</ITEM>
<LABEL>consonants</LABEL>
<ITEM>The following are recommended entity names for some special
consonants found in Western European languages:   <MENTIONED>ccedil</MENTIONED>
(&ccedil;), <MENTIONED>Ccedil</MENTIONED> (&Ccedil;), <MENTIONED>eth</MENTIONED>
(lowercase eth or Anglo-Saxon/Icelandic crossed d),
<MENTIONED>ETH</MENTIONED> (uppercase eth),
<MENTIONED>thorn</MENTIONED> (lowercase thorn),
<MENTIONED>THORN</MENTIONED> (uppercase thorn), <MENTIONED>szlig</MENTIONED>
(German s-z ligature or <TERM>esszett</TERM>, &szlig;).</ITEM>
<LABEL>punctuation marks</LABEL>
<ITEM>The following are recommended entity names for some commonly
found punctuation marks:
<MENTIONED>ldquo</MENTIONED> (left double quotation mark, in shape of
superscript 66), <MENTIONED>rdquo</MENTIONED> (right double quotation
mark, superscript 99), <MENTIONED>mdash</MENTIONED> (one-em dash),
<MENTIONED>hellip</MENTIONED> (horizontal ellipsis, three closely
spaced dots), <MENTIONED>rsquo</MENTIONED> (right single quote, in
shape of superscript 9).  See also the list of <SOCALLED>unsafe</SOCALLED>
characters given below.</ITEM>
<LABEL><SOCALLED>unsafe</SOCALLED> characters</LABEL>
<ITEM>The characters listed above as unsafe for transmission over
current international academic and public-access networks may be
represented with the following entities:  <MENTIONED>excl</MENTIONED>
(!), <MENTIONED>num</MENTIONED> (#), <MENTIONED>dollar</MENTIONED>
($), <MENTIONED>lsqb</MENTIONED> (left square bracket),
<MENTIONED>bsol</MENTIONED> (back-slanted solidus, <MENTIONED>\</MENTIONED>),
<MENTIONED>rsqb</MENTIONED> (right square bracket), <MENTIONED>circ</MENTIONED>
(circumflex,
<MENTIONED>^</MENTIONED>), <MENTIONED>lsquo</MENTIONED> (left single
quotation mark), <MENTIONED>grave</MENTIONED> (grave accent),
<MENTIONED>lcub</MENTIONED> (left curly bracket, <MENTIONED>{</MENTIONED>),
<MENTIONED>rcub</MENTIONED> (right curly bracket, <MENTIONED>}</MENTIONED>),
<MENTIONED>verbar</MENTIONED> (vertical bar, <MENTIONED>|</MENTIONED>),
<MENTIONED>tilde</MENTIONED> (~).</ITEM></LIST>
</P></DIV1>

<DIV1 ID="fronbac"><HEAD>Front and Back Matter</HEAD>
<!---->
<DIV2><HEAD>Front Matter</HEAD>
<P>For many purposes, particularly in older texts, the preliminary
material such as title pages, prefatory epistles, etc., may provide
very useful additional linguistic or social information. P3 provides a
set of recommendations for distinguishing the textual elements most
commonly encountered in front matter, which are summarized here.
<!---->
</P>
<DIV3 ID="h51" TYPE="Section"><HEAD>Title Page</HEAD>
<P>The start of a title page should be marked with the element
<GI>titlePage</GI>.  All text contained on the page should be
transcribed and tagged with the appropriate element from the following
list:
<LIST TYPE="gloss">
<LABEL><GI>titlePage</GI></LABEL>
<ITEM>contains the title page of a text, appearing within the front
or back matter. </ITEM>
<LABEL><GI>docTitle</GI></LABEL>
<ITEM>contains the title of a document, including all its
constituents, as given on a title page.  Must be divided into <GI>titlePart</GI>
elements.</ITEM>
<LABEL><GI>titlePart</GI></LABEL>
<ITEM>contains a subsection or division of the title of a work, as
indicated on a title page; also used for free-floating fragments of
the title page not part of the document title, authorship attribution,
etc.  Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>type</IDENT></LABEL>
<ITEM>specifies the role of this subdivision of the title. Suggested
values include:
<KW>main</KW> (main title),
<KW>sub</KW> (subtitle), <KW>desc</KW> (a descriptive paraphrase of
the work included in the title), and <KW>alt</KW> (alternative
title).</ITEM></LIST></ITEM>
<LABEL><GI>byline</GI></LABEL>
<ITEM>contains the primary statement of responsibility given for a
work on its title page or at the head or end of the work.</ITEM>
<LABEL><GI>docAuthor</GI></LABEL>
<ITEM>contains the name of the author of the document, as given on
the title page (often but not always contained in a
<GI>byline</GI>).</ITEM>
<LABEL><GI>docDate</GI></LABEL>
<ITEM>contains the date of the document, as given (usually) on the
title page.</ITEM>
<LABEL><GI>docEdition</GI></LABEL>
<ITEM>contains an edition statement as presented on a title page of a
document.</ITEM>
<LABEL><GI>docImprint</GI></LABEL>
<ITEM>contains the imprint statement (place and date of publication,
publisher name), as given (usually) at the foot of a title page.</ITEM>
<LABEL><GI>epigraph</GI></LABEL>
<ITEM>contains a quotation, anonymous or attributed, appearing at the
start of a section or chapter, or on a title page. </ITEM>
</LIST></P>
<P>Typeface distinctions should be marked with the <IDENT>rend</IDENT>
attribute when necessary, as described above. Very detailed
description of the letter spacing and sizing used in ornamental titles
is not as yet provided for by the Guidelines. Changes of language
should be marked by appropriate use of the <IDENT>lang</IDENT>
attribute or the
<GI>foreign</GI> element, as necessary. Names, wherever they appear,
should be tagged using the <GI>name</GI>, as elsewhere.
</P>
<P>Two example title pages follow:
<EG><![CDATA [<titlePage rend=Roman>
  <docTitle><titlePart type=main>
    PARADISE REGAIN'D. A POEM In IV <hi>BOOKS</hi>.
    </titlePart>
    <titlePart>
    To which is added <title>SAMSON AGONISTES</title>.
    </titlePart>
  </docTitle>
  <byLine>The Author <docAuthor>JOHN MILTON</docAuthor></byline>
  <docImprint><name>LONDON</name>,
    Printed by <name>J.M.</name>
    for <name>John Starkey</name>
    at the <name>Mitre</name>
    in <name>Fleetstreet</name>,
    near <name>Temple-Bar.</name>
  </docImprint>
  <docDate>MDCLXXI</docDate>
</titlePage>
]]></EG><EG><![CDATA [<titlePage>
  <docTitle><titlePart type=main>
  Lives of the Queens of England, from the Norman
    Conquest;</titlePart>
  <titlePart type='sub'>with anecdotes of their courts.
  </titlePart></docTitle>
  <titlePart>Now first published from Official Records
    and other authentic documents private as well as
    public.</titlePart>
  <docEdition>New edition, with corrections and
    additions</docEdition>
  <byline>By <docAuthor>Agnes Strickland</docAuthor></byline>
  <epigraph>
    <q>The treasures of antiquity laid up in old
       historic rolls, I opened.</q>
    <bibl>BEAUMONT</bibl>
  </epigraph>
  <docImprint>Philadelphia: Blanchard and Lea</docImprint>
  <docDate>1860.</docDate>
</titlePage>
]]>
</EG>
<!---->
</P></DIV3>
<DIV3 ID="h52"><HEAD>Prefatory Matter</HEAD>
<P>Major blocks of text within the front matter should be marked as
<GI>div</GI> or <GI>div1</GI> elements; the following suggested
values for the <IDENT>type</IDENT> attribute may be used to
distinguish various common types of prefatory matter:
<LIST TYPE="gloss">
<LABEL><KW>foreword</KW> </LABEL>
<ITEM>a text addressed to the reader, by the author, editor or
publisher, possibly in the form of a letter.</ITEM>
<LABEL><KW>preface</KW> </LABEL>
<ITEM>a text addressed to the reader, by the author, editor or
publisher, possibly in the form of a letter.</ITEM>
<LABEL><KW>dedication</KW> </LABEL>
<ITEM>a text (often a letter) addressed to someone other than the
reader in which the author typically commends the work in hand to the
attention of the person concerned.</ITEM>
<LABEL><KW>abstract</KW> </LABEL>
<ITEM>a prose argument summarizing the content of the work.</ITEM>
<LABEL><KW>ack</KW> </LABEL>
<ITEM>Acknowledgements.</ITEM>
<LABEL><KW>contents</KW> </LABEL>
<ITEM>a table of contents (typically this should be tagged as a
<GI>list</GI>).</ITEM>
<LABEL><KW>frontispiece</KW> </LABEL>
<ITEM>a pictorial frontispiece, possibly including some text.</ITEM>
</LIST>
</P>
<P>Like any text division, those in front matter may contain low
level structural or non-structural elements as described elsewhere.
They will generally begin with a heading or title of some kind which
should be tagged using the <GI>head</GI> element. Epistles will
contain the following additional elements:
<LIST TYPE="gloss">
<LABEL><GI>salute</GI></LABEL>
<ITEM>contains a salutation or greeting prefixed to a foreword,
dedicatory epistle or other division of a text, or the salutation in
the closing of a letter, preface, etc.</ITEM>
<LABEL><GI>signed</GI></LABEL>
<ITEM>contains the closing salutation, etc., appended to a foreword,
dedicatory epistle, or other division of a text.
</ITEM>
<LABEL><GI>byline</GI></LABEL>
<ITEM>contains the primary statement of responsibility given for a
work on its title page or at the head or end of the work.</ITEM>
<LABEL><GI>dateline</GI></LABEL>
<ITEM>contains a brief description of the place, date, time, etc., of
production of a letter, newspaper story, or other work, prefixed or
suffixed to it as a kind of heading or trailer. </ITEM>
<LABEL><GI>argument</GI></LABEL>
<ITEM>A formal list or prose description of the topics addressed by a
subdivision of a text.</ITEM>
<LABEL><GI>cit</GI></LABEL>
<ITEM>A quotation from some other document, together with a
bibliographic reference to its source.</ITEM>
<LABEL><GI>opener</GI></LABEL>
<ITEM>groups together dateline, byline, salutation, and similar
phrases appearing as a preliminary group at the start of a division,
especially of a letter.</ITEM>
<LABEL><GI>closer</GI></LABEL>
<ITEM>groups together dateline, byline, salutation, and similar
phrases appearing as a final group at the end of a division,
especially of a letter.</ITEM>
</LIST>   Epistles which appear elsewhere in a text will, of course,
contain these same elements.
</P>
<P>As an example, the dedication at the start of Milton's
<TITLE>Comus</TITLE> should be marked up as follows:
<EG><![CDATA [<div type='dedication'>
<head>To the Right Honourable <name>JOHN Lord Viscount
BRACLY</name>, Son and Heir apparent to the Earl of
Bridgewater, &amp;c.</head>
<salute>MY LORD,</salute>

<p>THis <hi>Poem</hi>, which receiv'd its first occasion of
Birth from your Self, and others of your Noble Family ....
and as in this representation your attendant
<name>Thyrsis</name>, so now in all reall expression
<closer>
<salute>Your faithfull, and most humble servant</salute>
<signed><name>H. LAWES.</name></signed>
</closer>
</div>
]]>
</EG>
<!---->
</P></DIV3></DIV2>
<DIV2><HEAD>Back Matter</HEAD>
<DIV3><HEAD>Structural Divisions of Back Matter</HEAD>
<P>Because of variations in publishing practice, back matter can
contain virtually any of the elements listed above for front matter,
and the same elements should be used where this is so.  Additionally,
back matter may contain the following types of matter within the
<GI>back</GI> element.  Like the structural divisions of the body,
these should be marked as <GI>div</GI> or <GI>div1</GI> elements,
and distinguished by the following suggested values of the
<IDENT>type</IDENT> attribute:
<LIST TYPE="gloss">
<LABEL><KW>appendix</KW> </LABEL>
<ITEM>an appendix.</ITEM>
<LABEL><KW>glossary</KW> </LABEL>
<ITEM>a list of words and definitions, typically in the form of a
<CODE LANG="sgml">list type=gloss</CODE>.</ITEM>
<LABEL><KW>notes</KW> </LABEL>
<ITEM>a series of <GI>note</GI>s.</ITEM>
<LABEL><KW>bibliography</KW> </LABEL>
<ITEM>a series of bibliographic references, typically in the form of
a special bibliographic-list element <GI>listBibl</GI>, whose items
are individual <GI>bibl</GI> elements.</ITEM>
<LABEL><KW>index</KW> </LABEL>
<ITEM>a set of index entries, possibly represented as a structured
list or glossary list, with optional leading
<GI>head</GI> and perhaps some paragraphs of introductory or closing
text (TEI P3 defines other specialized elements for generating indices
in document production, described above in section 
<PTR TARGET="index">).</ITEM>
<LABEL><KW>colophon</KW> </LABEL>
<ITEM>a description at the back of the book describing where, when,
and by whom it was printed; in modern books it also often gives
production details and identifies the type faces used.</ITEM>
</LIST>
</P>
</DIV3>
</DIV2></DIV1>
<DIV1 ID="teihead"><HEAD>The Electronic Title Page</HEAD>
<P>Every TEI text has a header which provides information analogous
to that provided by the title page of printed text. The header is
introduced by the element <GI>teiHeader</GI> and has four major
parts:
<LIST TYPE="gloss">
<LABEL><GI>fileDesc</GI></LABEL>
<ITEM>contains a full bibliographic description of an electronic
file.</ITEM>
<LABEL><GI>encodingDesc</GI></LABEL>
<ITEM>documents the relationship between an electronic text and the
source or sources from which it was derived.</ITEM>
<LABEL><GI>profileDesc</GI></LABEL>
<ITEM>provides a detailed description of non-bibliographic aspects of
a text, specifically the languages and sublanguages used, the
situation in which it was produced, the participants and their
setting.</ITEM>
<LABEL><GI>revisionDesc</GI></LABEL>
<ITEM>summarizes the revision history for a file.</ITEM>
</LIST>    A corpus or collection of texts, which share many
characteristics, may have one header for the corpus and individual
headers for each component of the corpus.  In this case the <IDENT>type</IDENT>
attribute indicates the type of header.
<EG><![CDATA [     <teiHeader type=corpus>
]]>
</EG> introduces the header for corpus-level information.
</P>
<P>Some of the header elements contain running prose which consists
of one or more <GI>p</GI>s.  Others are grouped:
<LIST TYPE="bullets">
<ITEM>Elements whose names end in <MENTIONED>Stmt</MENTIONED>(for
statement) usually enclose a group of elements recording some
structured information.
</ITEM>
<ITEM>Elements whose names end in <MENTIONED>Decl</MENTIONED> (for
declaration) enclose information about specific encoding practices.
</ITEM>
<ITEM>Elements whose names end in <MENTIONED>Desc</MENTIONED> (for
description) contain a prose description.
</ITEM>
</LIST></P>
<DIV2><HEAD>The File Description</HEAD>
<P>The <GI>fileDesc</GI> element is mandatory. It contains a full
bibliographic description of the file with the following elements:
<LIST TYPE="gloss">
<LABEL><GI>titleStmt</GI></LABEL>
<ITEM>groups information about the title of a work and those
responsible for its intellectual content.</ITEM>
<LABEL><GI>editionStmt</GI></LABEL>
<ITEM>groups information relating to one edition of a text.</ITEM>
<LABEL><GI>extent</GI></LABEL>
<ITEM>describes the approximate size of the electronic text as stored
on some carrier medium, specified in any convenient units.</ITEM>
<LABEL><GI>publicationStmt</GI></LABEL>
<ITEM>groups information concerning the publication or distribution
of an electronic or other text. </ITEM>
<LABEL><GI>seriesStmt</GI></LABEL>
<ITEM>groups information about the <TERM>series</TERM>, if any, to
which a publication belongs.</ITEM>
<LABEL><GI>notesStmt</GI></LABEL>
<ITEM>collects together any notes providing information about a text
additional to that recorded in other parts of the bibliographic
description.</ITEM>
<LABEL><GI>sourceDesc</GI></LABEL>
<ITEM>supplies a bibliographic description of the copy text(s) from
which an electronic text was derived or generated.</ITEM>
</LIST>   A minimal header has the following structure:
<EG><![CDATA [<teiHeader>
     <fileDesc>
          <titleStmt> ... </titleStmt>
          <publicationStmt> ... <publicationStmt>
          <sourceDesc> ... <sourceDesc>
     </fileDesc>
</teiHeader>
]]>
</EG>
</P>
<DIV3><HEAD>The Title Statement</HEAD>
<P>The following elements can be used in the <GI>titleStmt</GI>:
<LIST TYPE="gloss">
<LABEL><GI>title</GI></LABEL>
<ITEM>contains the title of a work, whether article, book, journal,
or series, including any alternative titles or subtitles.</ITEM>
<LABEL><GI>author</GI></LABEL>
<ITEM>in a bibliographic reference, contains the name of the
author(s), personal or corporate, of a work; the primary
<TERM>statement of responsibility</TERM> for any bibliographic item.
</ITEM>
<LABEL><GI>sponsor</GI></LABEL>
<ITEM>specifies the name of a sponsoring organization or institution.</ITEM>
<LABEL><GI>funder</GI></LABEL>
<ITEM>specifies the name of an individual, institution, or
organization responsible for the funding of a project or text.</ITEM>
<LABEL><GI>principal</GI></LABEL>
<ITEM>supplies the name of the principal researcher responsible for
the creation of an electronic text.</ITEM>
<LABEL><GI>respStmt</GI></LABEL>
<ITEM>supplies a statement of responsibility for someone responsible
for the intellectual content of a text, edition, recording, or series,
where the specialized elements for authors, editors, etc., do not
suffice or do not apply.</ITEM>
</LIST>   It is recommended that the title should distinguish the
computer file from the source text, for example:
<EG>[title of source]: a machine readable transcription
[title of source]: electronic edition
A machine readable version of: [title of source]
</EG> The <GI>respStmt</GI> element contains the following
subcomponents:
<LIST TYPE="gloss">
<LABEL><GI>resp</GI></LABEL>
<ITEM>contains a phrase describing the nature of a person's
intellectual responsibility.</ITEM>
<LABEL><GI>name</GI></LABEL>
<ITEM>contains a proper noun or noun phrase.</ITEM>
</LIST>   Example:
<EG><![CDATA [<titleStmt>
     <title>Two stories by Edgar Allen Poe: a machine readable
               transcription</title>
     <author>Poe, Edgar Allen (1809-1849)
     <respStmt><resp>compiled by</resp>
     <name>James D. Benson</name></respStmt>
</titleStmt>
]]>
</EG>
</P></DIV3>
<DIV3><HEAD>The Edition Statement</HEAD>
<P>The <GI>editionStmt</GI> groups information relating to one
edition of a text (where <MENTIONED>edition</MENTIONED> is used as
elsewhere in bibliography), and may include the following elements:
<LIST TYPE="gloss">
<LABEL><GI>edition</GI></LABEL>
<ITEM>describes the particularities of one edition of a text.</ITEM>
<LABEL><GI>respStmt</GI></LABEL>
<ITEM>supplies a statement of responsibility for someone responsible
for the intellectual content of a text, edition, recording, or series,
where the specialized elements for authors, editors, etc., do not
suffice or do not apply.</ITEM>
</LIST>
</P>
<P>Example:
<EG><![CDATA [<editionStmt>
     <edition n=U2>Third draft, substantially revised
     <date>1987</date>
     </edition>
</editionStmt>
]]>
</EG>
</P>
<P>Determining exactly what constitutes a new edition of an
electronic text is left to the encoder.
</P></DIV3>
<DIV3><HEAD>The Extent Statement
</HEAD>
<P>The <GI>extent</GI> statement describe the approximate size of a
file.
</P>
<P>Example:
<EG><![CDATA [<extent>4532 bytes</extent>
]]>
</EG>
</P></DIV3>
<DIV3><HEAD>The Publication Statement</HEAD>
<P>The <GI>publicationStmt</GI> is mandatory. It may contain a
simple prose description or groups of the elements described below:
<LIST TYPE="gloss">
<LABEL><GI>publisher</GI></LABEL>
<ITEM>provides the name of the organization responsible for the
publication or distribution of a bibliographic item.</ITEM>
<LABEL><GI>distributor</GI></LABEL>
<ITEM>supplies the name of a person or other agency responsible for
the distribution of a text.</ITEM>
<LABEL><GI>authority</GI></LABEL>
<ITEM>supplies the name of a person or other agency responsible for
making an electronic file available, other than a publisher or
distributor.</ITEM>
</LIST>
</P>
<P>At least one of these three elements must be present, unless the
entire publication statement is in prose. The following elements may
occur within them:
<LIST TYPE="gloss">
<LABEL><GI>pubPlace</GI></LABEL>
<ITEM>contains the name of the place where a bibliographic item was
published.</ITEM>
<LABEL><GI>address</GI></LABEL>
<ITEM>contains a postal or other address, for example of a publisher,
an organization, or an individual.</ITEM>
<LABEL><GI>idno</GI></LABEL>
<ITEM>supplies any standard or non-standard number used to identify a
bibliographic item. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>type</IDENT></LABEL>
<ITEM>categorizes the number, for example as an ISBN or other
standard series.</ITEM></LIST></ITEM>
<LABEL><GI>availability</GI></LABEL>
<ITEM>supplies information about the availability of a text, for
example any restrictions on its use or distribution, its copyright
status, etc. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>status</IDENT></LABEL>
<ITEM>supplies a code identifying the current availability of the
text. Sample values include <CODE>restricted</CODE>, <CODE>unknown</CODE>,
and <CODE>free</CODE>.</ITEM></LIST></ITEM>
<LABEL><GI>date</GI></LABEL>
<ITEM>contains a date in any format.</ITEM></LIST>
</P>
<P>Example:
<EG><![CDATA [<publicationStmt>
     <publisher>Oxford University Press</publisher>
     <pubPlace>Oxford</pubPlace> <date>1989</date>
     <idno type=ISBN> 0-19-254705-5</idno>
     <availability>Copyright 1989, Oxford University
          Press</availability>
</publicationStmt>
]]>
</EG>
</P></DIV3>
<DIV3><HEAD>Series and Notes Statements
</HEAD>
<P>The <GI>seriesStmt</GI> groups information about the series, if
any, to which a publication belongs. It may contain <GI>title</GI>,
<GI>idno</GI>, or <GI>respStmt</GI> elements.
</P>
<P>The <GI>notesStmt</GI>, if used, contains one or more <GI>note</GI>
elements which contain a note or annotation. Some information found in
the notes area in conventional bibliography has been assigned specific
elements in the TEI scheme.
</P></DIV3>
<DIV3><HEAD>The Source Description
</HEAD>
<P>The <GI>sourceDesc</GI> is a mandatory element which records
details of the source or sources from which the computer file is
derived. It may contain simple prose or a bibliographic citation,
using one or more of the following elements:
<LIST TYPE="gloss">
<LABEL><GI>bibl</GI></LABEL>
<ITEM>contains a loosely-structured bibliographic citation of which
the sub-components may or may not be explicitly tagged. </ITEM>
<LABEL><GI>biblFull</GI></LABEL>
<ITEM>contains a fully-structured bibliographic citation, in which
all components of the TEI file description are present.
<!-- except maybe the source.desc? --></ITEM>
<LABEL><GI>listBibl</GI></LABEL>
<ITEM>contains a list of bibliographic citations of any kind.
</ITEM>
</LIST></P>
<P>Examples:
<EG><![CDATA [<sourceDesc>
     <bibl>The first folio of Shakespeare, prepared by Charlton
          Hinman (The Norton Facsimile, 1968)</bibl>
</sourceDesc>
]]>
</EG>
<EG><![CDATA [<sourceDesc>
     <scriptStmt id=CNN12>
     <bibl><author>CNN Network News
          <title>News headlines
          <date>12 Jun 1989
     </bibl>
     </scriptStmt>
</sourceDesc>
]]>
</EG>
</P></DIV3></DIV2>
<DIV2><HEAD>The Encoding Description</HEAD>
<P>The <GI>encodingDesc</GI> element specifies the methods and
editorial principles which governed the transcription of the text. Its
use is highly recommended.  It may be prose description or may contain
elements from the following list:
<LIST TYPE="gloss">
<LABEL><GI>projectDesc</GI></LABEL>
<ITEM>describes in detail the aim or purpose for which an electronic
file was encoded, together with any other relevant information
concerning the process by which it was assembled or collected.</ITEM>
<LABEL><GI>samplingDecl</GI></LABEL>
<ITEM>contains a prose description of the rationale and methods used
in sampling texts in the creation of a corpus or collection.</ITEM>
<LABEL><GI>editorialDecl</GI></LABEL>
<ITEM>provides details of editorial principles and practices applied
during the encoding of a text.</ITEM>
<LABEL><GI>tagsDecl</GI></LABEL>
<ITEM>provides detailed information about the tagging applied to an
SGML document. </ITEM>
<LABEL><GI>refsDecl</GI></LABEL>
<ITEM>specifies how canonical references are constructed for this
text.</ITEM>
<LABEL><GI>classDecl</GI></LABEL>
<ITEM>contains one or more taxonomies defining any classificatory
codes used elsewhere in the text.</ITEM>
</LIST>
</P>
<DIV3><HEAD>Project and Sampling Descriptions</HEAD>
<P>Examples of <GI>projectDesc</GI> and <GI>samplingDesc</GI>:
<EG><![CDATA [<encodingDesc>
     <projectDesc>Texts collected for use in the Claremont
          Shakespeare Clinic, June 1990.
     </projectDesc>
</encodingDesc>
]]>
</EG>
<EG><![CDATA [<encodingDesc>
     <samplingDecl>Samples of 2000 words taken from the beginning
          of the text
     </samplingDecl>
</encodingDesc>
]]>
</EG>
</P></DIV3>
<DIV3><HEAD>Editorial Declarations</HEAD>
<P>The <GI>editorialDecl</GI> contains a prose description of the
practices used when encoding the text. Typically this description
should cover such topics as the following, each of which may
conveniently be given as a separate paragraph. <LIST TYPE="gloss">
<LABEL>correction </LABEL>
<ITEM>how and under what circumstances corrections have been made in
the text.</ITEM>
<LABEL>normalization</LABEL>
<ITEM>the extent to which the original source has been regularized or
normalized.</ITEM>
<LABEL>quotation</LABEL>
<ITEM>what has been done with quotation marks in the original -- have
they been retained or replaced by entity references, are opening and
closing quotes distinguished, etc. </ITEM>
<LABEL>hyphenation</LABEL>
<ITEM>what has been done with hyphens (especially end-of-line
hyphens)  in the original -- have they been retained, replaced by
entity references, etc.</ITEM>
<LABEL>segmentation</LABEL>
<ITEM>how has the text has been segmented, for example into
sentences, tone-units, graphemic strata, etc.</ITEM>
<LABEL>interpretation</LABEL>
<ITEM>what analytic or interpretive information has been added to the
text. </ITEM></LIST>
</P>
<P>Example:
<EG><![CDATA [<editorialDecl>
          <p>The part of speech analysis applied throughout
               section 4 was added by hand and has not been
               checked.
          <p>Errors in transcription controlled by using the
               WordPerfect spelling checker.
          <p>All words converted to Modern American spelling
               using Webster's 9th Collegiate dictionary.
          <p>All quotation marks converted to entity
               references &odq; and &cdq;.
</editorialDecl>
]]>
</EG>
</P></DIV3>
<DIV3><HEAD>Tagging, Reference, and Classification Declarations</HEAD>
<P>The <GI>tagsDecl</GI> element is used to provide detailed
information about the SGML tags actually appearing within a text. It
may contain a simple list of elements used, with a count for each,
using the following special purpose elements:
<LIST TYPE="gloss">
<LABEL><GI>tagUsage</GI></LABEL>
<ITEM>supplies information about the usage of a specific element
within the outermost <GI>text</GI> of a TEI conformant document.
Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>gi</IDENT></LABEL>
<ITEM>the name (generic identifier) of the element indicated by the
tag.</ITEM>
<LABEL><IDENT>occurs</IDENT></LABEL>
<ITEM>specifies the number of occurrences of this element within the
text.</ITEM>
</LIST>
</ITEM>
</LIST>
</P>
<P>The <GI>rendition</GI> element is used to document different ways
in which elements are rendered in the source text.
<LIST TYPE="gloss">
<LABEL><GI>rendition</GI></LABEL>
<ITEM>supplies information about the intended rendition of one or
more elements.</ITEM>
<LABEL><GI>tagUsage</GI></LABEL>
<ITEM>supplies information about the usage of a specific element
within a <GI>text</GI>. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>occurs</IDENT></LABEL>
<ITEM>specifies the number of occurrences of this element within the
text.</ITEM>
<LABEL><IDENT>ident</IDENT></LABEL>
<ITEM>specifies the number of occurrences of this element within the
text which bear a distinct value for the global
<IDENT>id</IDENT> attribute.</ITEM>
<LABEL><IDENT>render</IDENT></LABEL>
<ITEM>specifies the identifier of a <GI>rendition</GI> element which
defines how this element is to be rendered.</ITEM>
</LIST>
</ITEM>
</LIST>
</P>
<P>For example:
<EG><![CDATA [<tagsDecl>
 <tagUsage gi=text occurs=1>
 <tagUsage gi=body occurs=1>
 <tagUsage gi=p occurs=12>
 <tagUsage gi=hi occurs=6>
</tagsDecl>
]]>
</EG>  This (imaginary) tags declaration would be appropriate for a
text containing twelve paragraphs in its body, within which six <GI>hi</GI>
elements have been marked. Note that if the <GI>tagsDecl</GI> element
is used, it must contain a <GI>tagUsage</GI> element for <EMPH>every</EMPH>
element tagged in the associated text element.
</P>
<P>The <GI>refsDecl</GI> element is used to document the way in
which any standard referencing scheme built into the encoding works.
In its simplest form, it consists of prose description.</P>
<P>Example:
<EG><![CDATA [<refsDecl>
     <p>The N attribute on each DIV1 and DIV2 contains the
     canonical reference for each such division in the form
     XX.yyy where XX is the book number in roman numeral and
     yyy is the section number in arabic.
</refsDecl>
]]>
</EG></P>
<P>The <GI>classDecl</GI> element groups together definitions or
sources for any descriptive classification schemes used by other parts
of the header. At least one such scheme must be provided, encoded
using the following elements:
<LIST TYPE="gloss">
<LABEL><GI>taxonomy</GI></LABEL>
<ITEM>defines a typology used to classify texts either implicitly, by
means of a bibliographic citation, or explicitly by a structured
taxonomy.</ITEM>
<LABEL><GI>bibl</GI></LABEL>
<ITEM>contains a loosely-structured bibliographic citation of which
the sub-components may or may not be explicitly tagged. </ITEM>
<LABEL><GI>category</GI></LABEL>
<ITEM>contains an individual descriptive category, possibly nested
within a superordinate category, within a user-defined taxonomy.</ITEM>
<LABEL><GI>catDesc</GI></LABEL>
<ITEM>describes some category within a taxonomy or text typology, in
the form of a brief prose description.</ITEM>
</LIST>   In the simplest case, the taxonomy may be defined by a
bibliographic reference, as in the following example:
<EG><![CDATA [<classDecl>
     <taxonomy id='LCSH'>
          <bibl>Library of Congress Subject Headings
          </bibl>
     </taxonomy>
</classDecl>
]]>
</EG>
</P>
<P>Alternatively, or in addition, the encoder may define a special
purpose classification scheme, as in the following example:
<EG><![CDATA [<taxonomy id=B>
   <bibl>Brown Corpus</bibl>
   <category id=B.A><catDesc>Press Reportage
      <category id=B.A1><catDesc>Daily</category>
      <category id=B.A2><catDesc>Sunday</category>
      <category id=B.A3><catDesc>National</category>
      <category id=B.A4><catDesc>Provincial</category>
      <category id=B.A5><catDesc>Political</category>
      <category id=B.A6><catDesc>Sports</category>
     ...
   </category>
   <category id=B.D><catDesc>Religion
      <category id=B.D1><catDesc>Books</category>
      <category id=B.D2><catDesc>Periodicals and tracts</category>
   </category>
  ...
</taxonomy>
]]>
</EG>
</P>
<P>Linkage between a particular text and a category within such a
taxonomy is made by means of the <GI>catRef</GI> element within the
<GI>textClass</GI> element, as further described below.
</P></DIV3></DIV2>
<DIV2><HEAD>The Profile Description</HEAD>
<P>The <GI>profileDesc</GI> element enables information
characterizing various descriptive aspects of a text to be recorded
within a single framework. It has three optional components:
<LIST TYPE="gloss">
<LABEL><GI>creation</GI></LABEL>
<ITEM>contains information about the creation of a text.</ITEM>
<LABEL><GI>langUsage</GI></LABEL>
<ITEM>describes the languages, sublanguages, registers, dialects,
etc., represented within a text.</ITEM>
<LABEL><GI>textClass</GI></LABEL>
<ITEM>groups information which describes the nature or topic of a
text in terms of a standard classification scheme, thesaurus, etc.</ITEM>
</LIST>
</P>
<P>Examples:
<EG><![CDATA [<creation>
     <date value='1992-08'>August 1992</date>
     <name type=place>Taos, New Mexico</name>
</creation>
]]>
</EG>
</P>
<P>The <GI>textClass</GI> element classifies a text by reference to
the system or systems defined by the <GI>classDecl</GI> element, and
contains one or more of the following elements:
<LIST TYPE="gloss">
<LABEL><GI>keywords</GI></LABEL>
<ITEM>contains a list of keywords or phrases identifying the topic or
nature of a text. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>scheme</IDENT></LABEL>
<ITEM>identifies the controlled vocabulary within which the set of
keywords concerned is defined.</ITEM>
</LIST>
</ITEM>
<LABEL><GI>classCode</GI></LABEL>
<ITEM>contains the classification code used for this text in some
standard classification system. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>scheme</IDENT></LABEL>
<ITEM>identifies the classification system or taxonomy in use.</ITEM>
</LIST>
</ITEM>
<LABEL><GI>catRef</GI></LABEL>
<ITEM>specifies one or more defined categories within some taxonomy
or text typology. Attributes include:
<LIST TYPE="gloss">
<LABEL><IDENT>target</IDENT></LABEL>
<ITEM>identifies the categories concerned</ITEM></LIST></ITEM></LIST>

</P>
<P>The element <GI>keywords</GI> contains a list of keywords or
phrases identifying the topic or nature of a text. The attribute
<IDENT>scheme</IDENT> links these to the classification system
defined in
<GI>taxonomy</GI>.
<EG><![CDATA [<textClass>
     <keywords scheme=LCSH>
          <list>
          <item>English literature -- History and criticism --
               Data processing.</item>
          <item>English literature -- History and criticism --
               Theory etc.</item>
          <item>English language -- Style -- Data
               processing.</item>
          </list>
     </keywords>
</textClass>
]]>
</EG>
</P></DIV2>
<DIV2><HEAD>The Revision Description</HEAD>
<P>The <GI>revisionDesc</GI> element provides a change log in which
each change made to a text may be recorded. The log may be recorded as
a sequence of <GI>change</GI> elements each of which contains
<LIST TYPE="gloss">
<LABEL><GI>date</GI></LABEL>
<ITEM>contains a date in any format.</ITEM>
<LABEL><GI>respStmt</GI></LABEL>
<ITEM>supplies a statement of responsibility for someone responsible
for the intellectual content of a text, edition, recording, or series,
where the specialized elements for authors, editors, etc., do not
suffice or do not apply.</ITEM>
<LABEL><GI>item</GI></LABEL>
<ITEM>contains one component of a list.</ITEM>
</LIST>
</P>
<P>Example:
<EG><![CDATA [<revisionDesc>
     <change><date>6/3/91:</date>
          <respStmt><name>EMB</name><resp>ed.</resp></respStmt>
          <item>File format updated</item>
     <change><date>5/25/90:</date>
          <respSmt><name>EMB</name><resp>ed.</resp>
          <item>Stuart's corrections entered</item>
</revisionDesc>
]]>
</EG>
<!---->
<!---->
</P>
</DIV2></DIV1>
<!--
&CIMIext1;
-->
</BODY>
<BACK>
<DIV1><HEAD>List of Elements Described</HEAD>

<div2><head>Global Attributes</head>
<P>All elements in the TEI Lite document type definition have the
following global attributes:
<LIST TYPE="gloss">
<LABEL><IDENT>ana</IDENT></LABEL>
<ITEM>links an element with its interpretation.</ITEM>
<LABEL><IDENT>corresp</IDENT></LABEL>
<ITEM>links an element with one or more other corresponding elements.</ITEM>
<LABEL><IDENT>id</IDENT> </LABEL>
<ITEM>Unique identifier for the element; must begin with a letter,
can contain letters, digits, hyphens, and periods.</ITEM>
<LABEL><IDENT>lang</IDENT> </LABEL>
<ITEM>language of the text in this element; if not specified,
language is assumed to be the same as in the surrounding context.</ITEM>
<LABEL><IDENT>n</IDENT> </LABEL>
<ITEM>Name or number of this element; may be any string of
characters. Often used for recording traditional reference systems.</ITEM>
<LABEL><IDENT>next</IDENT></LABEL>
<ITEM>links an element to the next element in an aggregate.</ITEM>
<LABEL><IDENT>prev</IDENT></LABEL>
<ITEM>links an element to the previous element in an aggregate.</ITEM>
<LABEL><IDENT>rend</IDENT> </LABEL>
<ITEM>physical realization of the element in the copy text:
<CODE>italic</CODE>,
<CODE>roman</CODE>,<CODE>display block</CODE>, etc. Value may be any
string of characters.</ITEM>
</LIST></P>
</div2>
<div2><head>Elements in TEI Lite</head>
<P>The following list shows all the elements defined for the TEI Lite
DTD, with a brief description of each:
<!-- Revisions:                                              -->
<!-- 95-06-03 : MSM : sort                                   -->
<!-- 95-06-02 : MSM : partial sort                           -->
<!-- <!DOCTYPE temp system 'teilite.dtd' [                   -->
<!--  <!ENTITY % TEI.prose 'INCLUDE'    >                    -->
<!--  <!ELEMENT temp - - (list+) >                           -->
<!--       created by GetExx.dic 2 Jun 1995 7:33pm           -->
<!-- <temp>                                                  --> 
<LIST TYPE="gloss">
 
<LABEL><GI>abbr</GI></LABEL>
<ITEM>contains an abbreviation of any sort; expansion may be given in
the <IDENT>expan</IDENT> attribute.
</ITEM>  
<LABEL><GI>add</GI></LABEL>
<ITEM>contains letters, words, or phrases inserted in the text by an
author, scribe, annotator, or corrector.</ITEM>  
<LABEL><GI>address</GI></LABEL>
<ITEM>contains a postal or other address, for example of a publisher,
an organization, or an individual.</ITEM>  
<LABEL><GI>addrLine</GI></LABEL>
<ITEM>contains one line of a postal or other address.</ITEM>  
<LABEL><GI>anchor</GI></LABEL>
<ITEM>specifies a location or point within a document so that it may
be pointed to.</ITEM>
<LABEL><GI>argument</GI></LABEL>
<ITEM>A formal list or prose description of the topics addressed by a
subdivision of a text.</ITEM>  
<LABEL><GI>author</GI></LABEL>
<ITEM>in a bibliographic reference, contains the name of the
author(s), personal or corporate, of a work; the primary
<TERM>statement of responsibility</TERM> for any bibliographic item.</ITEM>
 
<LABEL><GI>authority</GI></LABEL>
<ITEM>supplies the name of a person or other agency responsible for
making an electronic file available, other than a publisher or
distributor.</ITEM>  
<LABEL><GI>availability</GI></LABEL>
<ITEM>supplies information about the availability of a text, for
example any restrictions on its use or distribution, its copyright
status, etc.
</ITEM>    
<LABEL><GI>back</GI></LABEL>
<ITEM>contains any appendixes, etc., following the main part of a
text.</ITEM>  
<LABEL><GI>bibl</GI></LABEL>
<ITEM>contains a loosely-structured bibliographic citation of which
the sub-components may or may not be explicitly tagged.
</ITEM>  
<LABEL><GI>biblFull</GI></LABEL>
<ITEM>contains a fully-structured bibliographic citation, in which
all components of the TEI file description are present.</ITEM>  
<LABEL><GI>biblScope</GI></LABEL>
<ITEM>defines the scope of a bibliographic reference, for example as
a list of page numbers, or a named subdivision of a larger work.</ITEM>
 
<LABEL><GI>body</GI></LABEL>
<ITEM>contains the whole body of a single unitary text, excluding any
front or back matter.</ITEM>  
<LABEL><GI>byline</GI></LABEL>
<ITEM>contains the primary statement of responsibility given for a
work on its title page or at the head or end of the work.</ITEM>    
<LABEL><GI>catDesc</GI></LABEL>
<ITEM>describes some category within a taxonomy or text typology, in
the form of a brief prose description.</ITEM>  
<LABEL><GI>category</GI></LABEL>
<ITEM>contains an individual descriptive category, possibly nested
within a superordinate category, within a user-defined taxonomy.</ITEM>
 
<LABEL><GI>catRef</GI></LABEL>
<ITEM>specifies one or more defined categories within some taxonomy
or text typology.</ITEM>  
<LABEL><GI>cell</GI></LABEL>
<ITEM>contains one cell of a table.</ITEM>  
<LABEL><GI>cit</GI></LABEL>
<ITEM>A quotation from some other document, together with a
bibliographic reference to its source.</ITEM>  
<LABEL><GI>classCode</GI></LABEL>
<ITEM>contains the classification code used for this text in some
standard classification system, which is identified by the
<IDENT>scheme</IDENT> attribute.</ITEM>  
<LABEL><GI>classDecl</GI></LABEL>
<ITEM>contains one or more taxonomies defining any classificatory
codes used elsewhere in the text.</ITEM>  
<LABEL><GI>closer</GI></LABEL>
<ITEM>groups together dateline, byline, salutation, and similar
phrases appearing as a final group at the end of a division,
especially of a letter.</ITEM>  
<LABEL><GI>code</GI></LABEL>
<ITEM>contains a short fragment of code in some formal language
(often a programming language).</ITEM>  
<LABEL><GI>corr</GI></LABEL>
<ITEM>contains the correct form of a passage apparently erroneous in
the copy text.</ITEM>  
<LABEL><GI>creation</GI></LABEL>
<ITEM>contains information about the creation of a text.</ITEM>    
<LABEL><GI>date</GI></LABEL>
<ITEM>contains a date in any format, with normalized value in the
<IDENT>value</IDENT> attribute.</ITEM>  
<LABEL><GI>dateline</GI></LABEL>
<ITEM>contains a brief description of the place, date, time, etc., of
production of a letter, newspaper story, or other work, prefixed or
suffixed to it as a kind of heading or trailer.
</ITEM>  
<LABEL><GI>del</GI></LABEL>
<ITEM>contains a letter, word or passage deleted, marked as deleted,
or otherwise indicated as superfluous or spurious in the copy text by
an author, scribe, annotator or corrector.</ITEM>  
<LABEL><GI>distributor</GI></LABEL>
<ITEM>supplies the name of a person or other agency responsible for
the distribution of a text.</ITEM>  
<LABEL><GI>div</GI></LABEL>
<ITEM>contains a subdivision of the front, body, or back of a text.</ITEM>
 
<LABEL><GI>div1</GI> ... <GI>div7</GI></LABEL>
<ITEM>contains a first-, second, ..., seventh-level subdivision of
the front, body, or back of a text.</ITEM>  
<LABEL><GI>divGen</GI></LABEL>
<ITEM>indicates the location at which a textual division generated
automatically by a text-processing application is to appear; the
<IDENT>type</IDENT> attribute specifies whether it is an index, table
of contents, or something else.</ITEM>  
<LABEL><GI>docAuthor</GI></LABEL>
<ITEM>contains the name of the author of the document, as given on
the title page (often but not always contained in a
<GI>byline</GI>).</ITEM>  
<LABEL><GI>docDate</GI></LABEL>
<ITEM>contains the date of the document, as given (usually) on the
title page.</ITEM>  
<LABEL><GI>docEdition</GI></LABEL>
<ITEM>contains an edition statement as presented on a title page of a
document.</ITEM>  
<LABEL><GI>docImprint</GI></LABEL>
<ITEM>contains the imprint statement (place and date of publication,
publisher name), as given (usually) at the foot of a title page.</ITEM>
 
<LABEL><GI>docTitle</GI></LABEL>
<ITEM>contains the title of a document, including all its
constituents, as given on a title page.  Must be divided into <GI>titlePart</GI>
elements.</ITEM>  
<LABEL><GI>edition</GI></LABEL>
<ITEM>describes the particularities of one edition of a text.</ITEM> 
<LABEL><GI>editionStmt</GI></LABEL>
<ITEM>groups information relating to one edition of a text.</ITEM>  
<LABEL><GI>editor</GI></LABEL>
<ITEM>secondary
<TERM>statement of responsibility</TERM> for a bibliographic item,
for example the name of an individual, institution or organization,
(or of several such) acting as editor, compiler, translator, etc.</ITEM>
 
<LABEL><GI>editorialDecl</GI></LABEL>
<ITEM>provides details of editorial principles and practices applied
during the encoding of a text.</ITEM>  
<LABEL><GI>eg</GI></LABEL>
<ITEM>contains a single short example of some technical topic being
discussed, e.g. a code fragment or a sample of SGML encoding.</ITEM>  
<LABEL><GI>emph</GI></LABEL>
<ITEM>marks words or phrases which are stressed or emphasized for
linguistic or rhetorical effect.</ITEM>  
<LABEL><GI>encodingDesc</GI></LABEL>
<ITEM>documents the relationship between an electronic text and the
source or sources from which it was derived.</ITEM>  
<LABEL><GI>epigraph</GI></LABEL>
<ITEM>contains a quotation, anonymous or attributed, appearing at the
start of a section or chapter, or on a title page. </ITEM>  
<LABEL><GI>extent</GI></LABEL>
<ITEM>describes the approximate size of the electronic text as stored
on some carrier medium, specified in any convenient units.</ITEM>  
<LABEL><GI>figure</GI></LABEL>
<ITEM>marks the spot at which a graphic is to be inserted in a
document. Attributes may be used to indicate an SGML entity containing
the image itself (in some non-SGML notation); paragraphs within the
<GI>figure</GI> element may be used to transcribe captions.</ITEM>  
<LABEL><GI>fileDesc</GI></LABEL>
<ITEM>contains a full bibliographic description of an electronic
file.</ITEM>  
<LABEL><GI>foreign</GI></LABEL>
<ITEM>identifies a word or phrase as belonging to some language other
than that of the surrounding text.
</ITEM>  
<LABEL><GI>formula</GI></LABEL>
<ITEM>contains a mathematical or chemical formula, optionally
presented in some non-SGML notation. The <IDENT>notation</IDENT> is
used to name the non-SGML notation used to transcribe the formula.</ITEM>
 
<LABEL><GI>front</GI></LABEL>
<ITEM>contains any prefatory matter (headers, title page, prefaces,
dedications, etc.) found before the start of a text proper.</ITEM>  
<LABEL><GI>funder</GI></LABEL>
<ITEM>specifies the name of an individual, institution, or
organization responsible for the funding of a project or text.</ITEM> 
<LABEL><GI>gap</GI></LABEL>
<ITEM>indicates a point where material has been omitted in a
transcription, whether for editorial reasons described in the TEI
header, as part of sampling practice, or because the material is
illegible or inaudible.</ITEM>  
<LABEL><GI>gi</GI></LABEL>
<ITEM>contains a special type of identifier: an SGML generic
identifier, or element name.</ITEM>  
<LABEL><GI>gloss</GI></LABEL>
<ITEM>marks a word or phrase which provides a gloss or definition for
some other word or phrase.
</ITEM>  
<LABEL><GI>group</GI></LABEL>
<ITEM>contains a number of unitary texts or groups of texts.</ITEM>  
<LABEL><GI>head</GI></LABEL>
<ITEM>contains any heading, for example, the title of a section, or
the heading of a list or glossary.</ITEM>  
<LABEL><GI>hi</GI></LABEL>
<ITEM>marks a word or phrase as graphically distinct from the
surrounding text, for reasons concerning which no claim is made.</ITEM>
 
<LABEL><GI>ident</GI></LABEL>
<ITEM>contains an identifier of some kind, e.g. a variable name or
the name of an SGML element or attribute.</ITEM>  
<LABEL><GI>idno</GI></LABEL>
<ITEM>supplies any standard or non-standard number used to identify a
bibliographic item; the <IDENT>type</IDENT> attribute identifies the
scheme or standard.</ITEM>  
<LABEL><GI>imprint</GI></LABEL>
<ITEM>groups information relating to the publication or distribution
of a bibliographic item.  </ITEM>  
<LABEL><GI>index</GI></LABEL>
<ITEM>marks a location to be indexed for some purpose. Attributes are
used to give the main form, and second- through fourth-level forms to
be entered in the index indicated.</ITEM>  
<LABEL><GI>interp</GI></LABEL>
<ITEM>provides for an interpretive annotation which can be  linked to
a span of text. Attributes include <IDENT>resp</IDENT>,
<IDENT>type</IDENT>, and <IDENT>value</IDENT>.</ITEM>  
<LABEL><GI>interpGrp</GI></LABEL>
<ITEM>collects together <GI>interp</GI> tags.</ITEM>  
<LABEL><GI>item</GI></LABEL>
<ITEM>contains one component of a list.</ITEM>    
<LABEL><GI>keywords</GI></LABEL>
<ITEM>contains a list of keywords or phrases identifying the topic or
nature of a text; if the keywords come from a controlled vocabulary,
it can be identified by the <IDENT>scheme</IDENT> attribute.</ITEM>  
<LABEL><GI>kw</GI></LABEL>
<ITEM>contains a keyword in some formal language.</ITEM>  
<LABEL><GI>l</GI></LABEL>
<ITEM>contains a single, possibly incomplete, line of verse.</ITEM>  
<LABEL><GI>label</GI></LABEL>
<ITEM>contains the label associated with an item in a list; in
glossaries, marks the term being defined.</ITEM>  
<LABEL><GI>langUsage</GI></LABEL>
<ITEM>describes the languages, sublanguages, registers, dialects,
etc., represented within a text.</ITEM>  
<LABEL><GI>lb</GI></LABEL>
<ITEM>marks the start of a new (typographic) line in some  edition or
version of a text.</ITEM>  
<LABEL><GI>lg</GI></LABEL>
<ITEM>contains a group of verse lines functioning as a formal unit
e.g. a stanza, refrain, verse paragraph, etc.</ITEM>  
<LABEL><GI>list</GI></LABEL>
<ITEM>contains any sequence of items organized as a list, whether of
numbered, bulletted, or other type.</ITEM>  
<LABEL><GI>listBibl</GI></LABEL>
<ITEM>contains a list of bibliographic citations of any kind.</ITEM> 
<LABEL><GI>mentioned</GI></LABEL>
<ITEM>marks words or phrases mentioned, not used.</ITEM>  
<LABEL><GI>milestone</GI></LABEL>
<ITEM>marks the boundary between sections of a text, as indicated by
changes in a standard reference system. Attributes include
<IDENT>ed</IDENT> (edition),
<IDENT>unit</IDENT> (page, etc.), and
<IDENT>n</IDENT> (new value).
</ITEM>  
<LABEL><GI>name</GI></LABEL>
<ITEM>contains a proper noun or noun phrase. Attributes can indicate
its type, give a normalized form, or associate it with a specific
individual or thing by means of a unique identifiers.
</ITEM>  
<LABEL><GI>note</GI></LABEL>
<ITEM>contains a note or annotation, with attributes to indicate the
type, location, and source of the note.</ITEM>  
<LABEL><GI>notesStmt</GI></LABEL>
<ITEM>collects together any notes providing information about a text
additional to that recorded in other parts of the bibliographic
description.</ITEM>  
<LABEL><GI>num</GI></LABEL>
<ITEM>contains a number, written in any form, with normalized value
in the <IDENT>value</IDENT> attribute.</ITEM>  
<LABEL><GI>opener</GI></LABEL>
<ITEM>groups together dateline, byline, salutation, and similar
phrases appearing as a preliminary group at the start of a division,
especially of a letter.</ITEM>  
<LABEL><GI>orig</GI></LABEL>
<ITEM>contains the original form of a reading, for which a
regularized form may be given in the attribute <IDENT>reg</IDENT>.</ITEM>
 
<LABEL><GI>p</GI></LABEL>
<ITEM>marks paragraphs in prose.</ITEM>  
<LABEL><GI>pb</GI></LABEL>
<ITEM>marks the boundary between one page of a text and the next in a
standard reference system.</ITEM>  
<LABEL><GI>principal</GI></LABEL>
<ITEM>supplies the name of the principal researcher responsible for
the creation of an electronic text.</ITEM>  
<LABEL><GI>profileDesc</GI></LABEL>
<ITEM>provides a detailed description of non-bibliographic aspects of
a text, specifically the languages and sublanguages used, the
situation in which it was produced, the participants and their
setting.</ITEM>  
<LABEL><GI>projectDesc</GI></LABEL>
<ITEM>describes in detail the aim or purpose for which an electronic
file was encoded, together with any other relevant information
concerning the process by which it was assembled or collected.</ITEM> 
<LABEL><GI>ptr</GI></LABEL>
<ITEM>a pointer to another location in the current document in terms
of one or more identifiable elements.</ITEM>  
<LABEL><GI>publicationStmt</GI></LABEL>
<ITEM>groups information concerning the publication or distribution
of an electronic or other text. </ITEM>  
<LABEL><GI>publisher</GI></LABEL>
<ITEM>provides the name of the organization responsible for the
publication or distribution of a bibliographic item.</ITEM>  
<LABEL><GI>pubPlace</GI></LABEL>
<ITEM>contains the name of the place where a bibliographic item was
published.</ITEM>    
<LABEL><GI>q</GI></LABEL>
<ITEM>contains a quotation or apparent quotation.</ITEM>  
<LABEL><GI>ref</GI></LABEL>
<ITEM>a reference to another location in the current document, in
terms of one or more identifiable elements, possibly modified by
additional text or comment.</ITEM>  
<LABEL><GI>refsDecl</GI></LABEL>
<ITEM>specifies how canonical references are constructed for this
text.</ITEM>  
<LABEL><GI>reg</GI></LABEL>
<ITEM>contains a reading which has been regularized or normalized in
some sense; original reading may be given in the attribute
<IDENT>orig</IDENT>.</ITEM>  
<LABEL><GI>rendition</GI></LABEL>
<ITEM>supplies information about the intended rendition of one or
more elements.</ITEM>  
<LABEL><GI>resp</GI></LABEL>
<ITEM>contains a phrase describing the nature of a person's
intellectual responsibility.</ITEM>  
<LABEL><GI>respStmt</GI></LABEL>
<ITEM>supplies a statement of responsibility for someone responsible
for the intellectual content of a text, edition, recording, or series,
where the specialized elements for authors, editors, etc., do not
suffice or do not apply.</ITEM>  
<LABEL><GI>revisionDesc</GI></LABEL>
<ITEM>summarizes the revision history for a file.</ITEM>  
<LABEL><GI>row</GI></LABEL>
<ITEM>contains one row of a table.</ITEM>  
<LABEL><GI>rs</GI></LABEL>
<ITEM>contains a general purpose name or referring string. Attributes
can indicate its type, give a normalized form, or associate it with a
specific individual or thing by means of a unique identifiers.
</ITEM>  
<LABEL><GI>s</GI></LABEL>
<ITEM>identifies an <TERM>s-unit</TERM> within a document, for
purposes of establishing a simple canonical referencing scheme
covering the entire text.</ITEM>  
<LABEL><GI>salute</GI></LABEL>
<ITEM>contains a salutation or greeting prefixed to a foreword,
dedicatory epistle or other division of a text, or the salutation in
the closing of a letter, preface, etc.</ITEM>  
<LABEL><GI>samplingDecl</GI></LABEL>
<ITEM>contains a prose description of the rationale and methods used
in sampling texts in the creation of a corpus or collection.</ITEM>  
<LABEL><GI>seg</GI></LABEL>
<ITEM>identifies a span or segment of text within a document so that
it may be pointed to; the <IDENT>type</IDENT> attribute categorizes
the segment.</ITEM>  
<LABEL><GI>series</GI></LABEL>
<ITEM>contains information about the series in which a book or other
bibliographic item has appeared.</ITEM>  
<LABEL><GI>seriesStmt</GI></LABEL>
<ITEM>groups information about the <TERM>series</TERM>, if any, to
which a publication belongs.</ITEM>  
<LABEL><GI>sic</GI></LABEL>
<ITEM>contains text reproduced although apparently incorrect or
inaccurate.</ITEM>  
<LABEL><GI>signed</GI></LABEL>
<ITEM>contains the closing salutation, etc., appended to a foreword,
dedicatory epistle, or other division of a text.</ITEM>  
<LABEL><GI>soCalled</GI></LABEL>
<ITEM>contains a word or phrase for which the author or narrator
indicates a disclaiming of responsibility, for example by the use of
scare quotes or italics.</ITEM>  
<LABEL><GI>sourceDesc</GI></LABEL>
<ITEM>supplies a bibliographic description of the copy text(s) from
which an electronic text was derived or generated.</ITEM>  
<LABEL><GI>sp</GI></LABEL>
<ITEM>contains an individual speech in a performance text, or a
passage presented as such in a prose or verse text, with <IDENT>who</IDENT>
attribute to identify speaker.
</ITEM>  
<LABEL><GI>speaker</GI></LABEL>
<ITEM>contains a special form of heading or label, giving the name of
one or more speakers in a performance text or fragment.</ITEM>  
<LABEL><GI>sponsor</GI></LABEL>
<ITEM>specifies the name of a sponsoring organization or institution.</ITEM>
 
<LABEL><GI>stage</GI></LABEL>
<ITEM>contains any kind of stage direction within a performance text
or fragment.</ITEM>  
<LABEL><GI>table</GI></LABEL>
<ITEM>contains text displayed in tabular form, in rows and columns.
</ITEM>  
<LABEL><GI>tagsDecl</GI></LABEL>
<ITEM>provides detailed information about the tagging applied to an
SGML document. </ITEM>  
<LABEL><GI>tagUsage</GI></LABEL>
<ITEM>supplies information about the usage of a specific element
within the outermost <GI>text</GI> of a TEI conformant document.</ITEM>
 
<LABEL><GI>taxonomy</GI></LABEL>
<ITEM>defines a typology used to classify texts either implicitly, by
means of a bibliographic citation, or explicitly by a structured
taxonomy.</ITEM>  
<LABEL><GI>term</GI></LABEL>
<ITEM>contains a single-word, multi-word or symbolic designation
which is regarded as a technical term.</ITEM>  
<LABEL><GI>textClass</GI></LABEL>
<ITEM>groups information which describes the nature or topic of a
text in terms of a standard classification scheme, thesaurus, etc.</ITEM>
 
<LABEL><GI>time</GI></LABEL>
<ITEM>contains a phrase defining a time of day in any format, with
normalized value in the <IDENT>value</IDENT> attribute.</ITEM>  
<LABEL><GI>title</GI></LABEL>
<ITEM>contains the title of a work, whether article, book, journal,
or series, including any alternative titles or subtitles.</ITEM>  
<LABEL><GI>titlePage</GI></LABEL>
<ITEM>contains the title page of a text, appearing within the front
or back matter. </ITEM>  
<LABEL><GI>titlePart</GI></LABEL>
<ITEM>contains a subsection or division of the title of a work, as
indicated on a title page; also used for free-floating fragments of
the title page not part of the document title, authorship attribution,
etc.
</ITEM>  
<LABEL><GI>titleStmt</GI></LABEL>
<ITEM>groups information about the title of a work and those
responsible for its intellectual content.</ITEM>  
<LABEL><GI>trailer</GI></LABEL>
<ITEM>contains a closing title or footer appearing at the end of  a
division of a text. </ITEM>  
<LABEL><GI>unclear</GI></LABEL>
<ITEM>contains a word, phrase, or passage which cannot be transcribed
with certainty because it is illegible or inaudible in the source.
</ITEM>  
<LABEL><GI>xptr</GI></LABEL>
<ITEM>defines a pointer to another location in the current document
or an external document.</ITEM>  
<LABEL><GI>xref</GI></LABEL>
<ITEM>defines a pointer to another location in the current document
or an external document, possibly modified by additional text or
comment.</ITEM>  
</LIST>
<!-- </temp>                                                  -->
<!--, together with a reference to the section in which it is
discussed in the present document. <list type=gloss> </list> -->
</P>
</div2>
</DIV1>
<DIV1 ID="bibapp"><HEAD>References</HEAD>

<P>This appendix contains a list of bibliographic references for
works on SGML and related topics, presented also to demonstrate the
use of the <GI>bibl</GI> element discussed in section <PTR TARGET="bibls"
> above. 
<EG><![CDATA [<listBibl>

<bibl>ALA (American Library Association).  <title>ALA-LC
Romanization Tables:  Transliteration Schemes for Non-Roman
Scripts</title>, approved by the Library of Congress and the American
Library Association, tables compiled and edited by Randall K. Barry.
Washington:  Library of Congress, 1991.
</bibl>

<bibl>ANSI (American National Standards Institute).  <title>ANSI
X3.4-1986.  American National Standard for Information Systems --- Coded
Character Sets --- 7-bit American National Standard Code for Information
Interchange (7-bit ASCII).</title>  [New York]:  ANSI, 1986.
</bibl>

<bibl>
<author>Barnard, David, et al.</author>
<title level=a>SGML-Based Markup for Literary Texts.</title>
<title>Computers and the Humanities</title>
<biblScope>22 (1988):  265-76.</biblScope>
</bibl>

<bibl>
   <author>Barron, David</author>
   <title level=a>Why use SGML?</title>
   <title>Electronic Publishing
          Origination, Dissemination and Design</title>
   <biblScope>2.1 (April 1989):  3-24.</biblScope>
</bibl>

<bibl>
<author>Coombs, James H., Allen H. Renear, and Steven J.
DeRose.</author> <title level=a>Markup Systems and the Future of
Scholarly Text Processing.</title> <title>Communications of the
ACM</title>
<biblScope>30.11 (November 1987):  933-947.</biblScope>
</bibl>

<bibl>
   <editor>Cover, Robin C., et al.</editor>
   <title>A Bibliography on Structured Text:
          Technical Report 90-281</title>
   <publisher>Queen's University,</publisher>
   <pubPlace>Kingston, Ont.</pubPlace>
   <date>June 1990</date>
<note place=inline>A current version of this bibliography
is maintained at <code>http://www.sil.org/sgml/sgml.html</code>.
</bibl>

<bibl>Goldfarb, Charles F., <title>The SGML Handbook.</title>
Oxford:  Clarendon Press, 1990.</bibl>

<bibl>
   <author>van Herwijnen, Eric.</author>
   <title>Practical SGML.</title>
   <publisher>Kluwer Academic Publishers</publisher>
   <date>1990; 2d ed. 1994.</date>
</bibl>

<bibl>ISO (International Organization for Standardization).
<title>ISO 8859-1:  1987 (E).  Information processing --- 8-bit
Single-Byte Coded Graphic Character Sets --- Part 1:  Latin Alphabet No.
1.</title>  (<title>Traitement de l'information --- Jeux de caracte``res
graphiques cod&eacute;s sur un seul octet --- Partie 1:  Alphabet latin no
1.</title>) First edition --- 1987-02-15.  [Geneva]:  International
Organization for Standardization, 1987.
</bibl>

<bibl>ISO (International Organization for Standardization).
<title>ISO 8879-1986 (E).  Information processing --- Text and Office
Systems --- Standard Generalized Markup Language (SGML).</title>  First
edition --- 1986-10-15.  [Geneva]:  International Organization for
Standardization, 1986.
</bibl>

<bibl>ISO (International Organization for Standardization).
<title>ISO 8879:1986 / A1:1988 (E).  Information processing --- Text and
Office Systems --- Standard Generalized Markup Language (SGML),
Amendment 1.</title>  Published 1988-07-01.
[Geneva]:  International Organization for Standardization, 1988.
</bibl>

<bibl>ISO (International Organization for Standardization).
<title>ISO/TR 9573-1988(E).  Information processing---SGML support
facilities---Techniques for using SGML.</title>  Final text of
1988-09-12.
</bibl>

<bibl>ISO (International Organization for Standardization), and IEC
(International Electrotechnical Commission). <title>ISO/IEC 10646-1:
1993.  Information technology --- Universal Multiple-Octet Coded
Character Set (UCS) --- Part 1:  Architecture and Basic Multilingual
Plane.</title>
[Geneva]:  International Organization for
Standardization, 1993.
</bibl>

<bibl>ISO (International Organization for Standardization), and IEC
(International Electrotechnical Commission).
<title>ISO/IEC 10744: 1992.  Information
Technology --- Hypermedia/Time-based Structuring Language
(HyTime).</title>
[Geneva]:  International Organization for Standardization, 1992.
</bibl>

<bibl>
Langendoen, D. Terence, and Gary F. Simons.
<title level=a>A Rationale for the TEI
Recommendations for Feature-Structure Markup.</title>
<title>Computers and the Humanities</title>
(1995; in press).
</bibl>

<bibl>
   <author>Warmer, J., and S. van Egmond</author>
   <title level=a>The implementation of the Amsterdam
        SGML parser.</title>
   <title>Electronic Publishing
        Origination, Dissemination and Design</title>
   <biblScope>2.2 (July 1989):  65-90.</biblScope>
</bibl>

</listBibl>

]]></EG></P></DIV1></BACK></TEXT></TEI.2>

