Explain the behavior of the following program:
#include <iostream>
#include <string>
using namespace std;
class Player {
public:
Player(const string& name) : name_(name) {}
virtual void print() const {cout << "Player " << name_ << endl;}
const string& name() const {return name_;}
private:
string name_;
};
class Elf : public Player {
public:
Elf(const string& name, const string& weapon)
: Player(name), weapon_(weapon) {
}
virtual void print() const {
cout << "Elf " << name() << " wielding " << weapon_ << endl;
}
private:
string weapon_;
};
class Hobbit : public Player {
public:
Hobbit(const string& name, const string& food)
: Player(name), food_(food) {
}
virtual void print() const {
cout << "Hobbit " << name() << " eating " << food_ << endl;
}
private:
string food_;
};
void do_print(const Player* player) {player->print();}
int main() {
Elf legolas("Legolas", "bow and arrow");
Elf elrond("Elrond", "sword named Andural");
Hobbit samwise("Samwise Gamgee", "lembas wafer");
do_print(&legolas); do_print(&elrond); do_print(&samwise); cout << endl;
void* p = *reinterpret_cast<void**>(&samwise);
*reinterpret_cast<void**>(&samwise) = *reinterpret_cast<void**>(&legolas);
*reinterpret_cast<void**>(&legolas) = p;
do_print(&legolas); do_print(&elrond); do_print(&samwise); cout << endl;
legolas.print(); elrond.print(); samwise.print(); cout << endl;
return 0;
}
Elf Legolas wielding bow and arrow
Elf Elrond wielding sword named Andural
Hobbit Samwise Gamgee eating lembas wafer
Hobbit Legolas eating bow and arrow
Elf Elrond wielding sword named Andural
Elf Samwise Gamgee wielding lembas wafer
Elf Legolas wielding bow and arrow
Elf Elrond wielding sword named Andural
Hobbit Samwise Gamgee eating lembas wafer
Essentially, highlghted lines lines swaps the objects'
vptr
!
Σ
: set of symbols
ε
: empty string
L ⊂ Σ*
: set of words or strings (language)
w
in
L
?
L
?
L(M)
: set of words (language) that machine
M
accepts.
anbn
(number of
a
s
followed by equal number of
b
s);
but we can decide
anbm
(some
a
s
followed by some
b
s)
anbn
can
be decided by a context-free grammar
a
,
b
∈ Σ
,
assume
x
,
y
are regular expressions (recursive definition):
ε
(empty string)
a
(any symbol)
x*
:
zero or more repetitions of pattern
x
xy
:
x
followed by
y
(concatenation)
x|y
:
x
or
y
Regular expression operators are intentionally minimal because that simplifies the proofs of mathematical theorems.
x
and
y
,
we can show the construction of machines that recognize
x*
,
xy
,
and
x|y
The practical value of regular expressions lies in its compact notation for expressing patterns. We are less concerned with the minimal set of operations so we can construct additional operators that do not add to the expressiveness of the theoretical model, but shorten some patterns:
x?
:
(x|ε)
(optional
x
)
x+
:
(xx*)
or
(x*x)
(x
one or more times)
x{n}
:
n
repetitions of
x
x{n, m}
:
at least n and at most m repetitions of
x
[abcd]
:
(a|b|c|d)
[a-d]
:
(a|b|c|d)
[^abcd]
:
any character
except
a
,
b
,
c
,
or
d
[^a-d]
:
any character
except
a
,
b
,
c
,
or
d
.
:
any character
except
newline
^
:
match empty string at the beginning of the string (or line)
$
:
match empty string at the end of the string (or line)
\
: escape the meaning of the next character
Numerous languages and utilities process regexps.
\d
: match digit
\w
: match word character
\s
: match whitespace (space or tab character)
(?:x)
:
match
x
but do not capture the matched substring
^
and
$
match beginning/end of line vs. beginning/end of string
Read the documentation. O'Reilly publishes several good books the subject: learning..., cookbook..., mastering....
if ($var =~ /pattern/) {
do_something();
}
# search and replace
$line =~ s/pattern/replacement/;
$var =~ /pattern/;
$swaped = "$2 $1";
if ($line =~ s/(\w+) (\w+)/$2 $1/) {
print "words swapped in $line";
}
Regular expressions are enormously useful, but they can be misused:
Regular expressions are greedy. For example, suppose you want to match the first parenthesized number in the string
+...+.. 0x1d6b820 left(0x1d6b720) right(0x1d6ab70) 1557
.
The obvious but wrong regular expression would be
\(.*\)
That is, match
'('
,
followed by any number of any character, followed by
')'
.
The backslashes are required because an unescaped parenthesis is
used for grouping subexpressions.
The expression is wrong because it will, in fact, match this
substring:
(0x1d6b720) right(0x1d6ab70)
because the first right parenthesis is matched as one of the
any
character.
A better pattern expression would be
\([^)\n]*\)
(match occurrences of any character except a right parenthesis or
newline, then match the right parenthesis).
See also: http://www.codinghorror.com/blog/2008/06/regular-expressions-now-you-have-two-problems.html
Personal opinion: use them freely for ancilliary tasks, but avoid them for production.
Case analysis:
[Ee][-+]?\d+
\.\d+([Ee][-+]?\d+)?
\d+\.\d*([Ee][-+]?\d+)?
\d+[Ee][-+]?\d+
Putting it together:
(((\d+\.\d*)|(\.\d+))([Ee][-+]?\d+)?)|(\d+[Ee][-+]?\d+)
When the
\d
code for digit is not supported:
((([0-9]+\.[0-9]*)|(\.[0-9]+))([Ee][-+]?[0-9]+)?)|([0-9]+[Ee][-+]?[0-9]+)
When whitespace is ignored
(/x
flag):
(
(
([0-9]+\.[0-9]*)
|
(\.[0-9]+)
)
(
[Ee][-+]?[0-9]+
)?
)
|
(
[0-9]+[Ee][-+]?[0-9]+
)
TODO
Reconstructing a tree graphic from the Huffman tree dump. Output is of the form (interior & leaf nodes):
+...+.. 0x1d6b820 left(0x1d6b720) right(0x1d6ab70) 1557
+...+... 0x1d6b720 left(0x1d6b600) right(0x1d6a650) 782
+...+...+ 0x1d6b600 left(0x1d6ab90) right(0x1d6a820) 394
+...+...+. 0x1d6ab90 0x55 (U) 200
+...+...+. 0x1d6a820 0x42 (B) 194
+...+...+ 0x1d6a650 0x48 (H) 388
Dot language takes a text description of a graph (nodes and edges):
node0x1d6b720 [label="782"];
node0x1d6b600 [label="394"];
node0x1d6ab90 [label="0x55 (U) 200"];
...
node0x1d6b240 -> node0x1d6ac90 [label="0"];
node0x1d6b240 -> node0x1d6b1e0 [label="1"];
node0x1d6b1e0 -> node0x1d6a3c0 [label="0"];
node0x1d6b1e0 -> node0x1d6a400 [label="1"];
Python script to convert the dump into the dot file:
#! /usr/bin/python
import re
import sys
# Tell dot it's a directed graph.
print 'digraph {'
# Edges are written after all the nodes have been defined. Save them
# in a list and print them at the end.
edges = []
for n, line in enumerate(sys.stdin):
# Try to match interior node pattern.
match = re.search(r"(0x[0-9a-f]+).*left\((0x[0-9a-f]+).*right\((0x[0-9a-f]+)[^0-9]*([0-9]+)", line)
if match:
# Give each match group a symbolic name
node = match.group(1)
left = match.group(2)
right = match.group(3)
label = match.group(4)
# Save edges for left and right child.class="hilite">
edges.append(' node%s -> node%s [label="0"];' % (node, left))
edges.append(' node%s -> node%s [label="1"];' % (node, right))
# Print node with label. Node id is based on the pointer
# value from the dump so it'll uniquely identify the node.
print ' node%s [label="%s"];' % (node, label)
else:
# Didn't match interior node; try to match leaf node.
match = re.search(r"(0x[0-9a-f]+) *(.*)$", line)
if not match:
print "bad input line ", n, ": ", line
sys.exit(1)
node = match.group(1)
# Need to escape the quote character in the output.
label = re.sub('"', '\\"', match.group(2))
print ' node%s [label="%s"];' % (node, label)
for edge in edges:
print edge
print '}'
Slightly more readable version of the script:
#! /usr/bin/python
import re
import sys
# Since the pattern is matched inside a loop, compile it once outside
# the loop. VERBOSE flag makes the pattern slightly more readable.
# Label the capture groups to make use of the capture group
# dictionary feature of Python regexps.
match_interior = re.compile(
r"""
(?P<node>0x[0-9a-f]+) # node address
.*
left\(
(?P<left>0x[0-9a-f]+) # left child pointer
.*
right\(
(?P<right>0x[0-9a-f]+) # right child pointer
[^0-9]*
(?P<label>[0-9]+) # label is frequency value
""",
re.VERBOSE)
match_leaf = re.compile(r"(?P<node>0x[0-9a-f]+) *(?P<label>.*)$")
print 'digraph {'
# Edges are written after all the nodes have been defined. Save them
# in a list and print them at the end.
edges = []
for n, line in enumerate(sys.stdin):
match = match_interior.search(line)
if match:
d = match.groupdict()
print ' node%s [label="%s"];' % (d['node'], d['label'])
# Print node with label. Node id is based on the pointer
# value from the dump so it'll uniquely identify the node.
edges.append(' node%s -> node%s [label="0"];' % (d['node'], d['left']))
edges.append(' node%s -> node%s [label="1"];' % (d['node'], d['right']))
else:
# Didn't match interior node; try to match leaf node.
match = match_leaf.search(line);
if not match:
print 'bad input line ', n, ': ', line
sys.exit(1)
d = match.groupdict()
# Need to escape the quote character in the output.
print ' node%s [label="%s"];' % (d['node'],
re.sub('"', '\\"', d['label']))
for edge in edges:
print edge
print '}'