#!/usr/bin/php -f Parse (One of the COL_xxx defilnes) var $name; var $datatype; // string of raw data type var $length; var $decimals; var $unsigned; var $zerofill; var $binary; var $values; // array of strings. var $null; var $autoincrement; var $default; function & Column($type) { $this->type = $type; $this->name = ''; $this->datatype = ''; $this->length = -1; $this->decimals = -1; $this->unsigned = false; $this->zerofill = false; $this->binary = false; $this->values = array(); $this->null = true; $this->autoincrement = false; $this->default = ''; return $this; } function getlength() { if($this->length >= 0) { return $this->length; } if($this->datatype == 'INT') { if($this->unsigned) { return 10; } else { return 11; } } if($this->datatype == 'TINYINT') { if($this->unsigned) { return 3; } else { return 4; } } if($this->datatype == 'SMALLINT') { if($this->unsigned) { return 5; } else { return 6; } } if($this->datatype == 'BIGINT') { if($this->unsigned) { return 20; } else { return -1; } } if($this->datatype == 'NUMERIC' || $this->datatype == 'DECIMAL') { return 10; } return -1; } function getdecimals() { if($this->decimals >= 0) { return $this->decimals; } if($this->datatype == 'NUMERIC' || $this->datatype == 'DECIMAL') { return 0; } return -1; // an error ? } function getdefault() { if(strlen($this->default) > 0) { if($this->default == 'NULL') { return $this->default; } if(substr($this->default, 0, 1) == "'") { return $this->default; } else { return "'".$this->default."'"; } } else if($this->null) { return "NULL"; } else if($this->autoincrement) { return "NULL"; } else { // this requires a data type specific default if($this->datatype == 'INT' || $this->datatype == 'INTEGER') { return "'0'"; } if($this->datatype == 'YEAR') { return "'0000'"; } if($this->datatype == 'CHAR' || $this->datatype == 'VARCHAR' || $this->datatype =='TEXT') { return "''"; } if($this->datatype == 'DATE') { return "'0000-00-00'"; } if($this->datatype == 'DATETIME') { return "'0000-00-00 00:00:00'"; } if($this->datatype == 'TIMESTAMP') { return 'NULL'; } if($this->datatype == 'ENUM') { return $this->values[0]; } if($this->datatype == 'NUMERIC' || $this->datatype == 'DECIMAL') { $v = "0"; if($this->decimals > 0) { $v.= ".".str_repeat('0', $this->decimals); } return "'$v'"; } } echo "Warning: no known default for $this->name type $this->datatype\n"; return ""; } function getstring() { $s = ''; if($this->type == COL_PRIMARY_KEY) { $s.="PRIMARY KEY ("; for($i = 0; $i < sizeof($this->values); $i++) { if($i > 0) { $s.=", "; } $s.=$this->values[$i]; } $s.=")"; } else if($this->type == COL_UNIQUE) { $s.="UNIQUE KEY ".$this->name." ("; for($i = 0; $i < sizeof($this->values); $i++) { if($i > 0) { $s.=", "; } $s.=$this->values[$i]; } $s.=")"; } else if($this->type == COL_KEY) { $s.="KEY ".$this->name." ("; for($i = 0; $i < sizeof($this->values); $i++) { if($i > 0) { $s.=", "; } $s.=$this->values[$i]; } $s.=")"; } else if($this->type == COL_FIELD) { $s.="$this->name $this->datatype"; if($this->datatype == 'CHAR' || $this->datatype == 'VARCHAR') { if($this->getlength() >= 0) { $s.="(".$this->getlength().")"; } if($this->binary) { $s.=" BINARY"; } } else if($this->datatype == 'TINYINT' || $this->datatype == 'SMALLINT' || $this->datatype == 'MEDIUMINT' || $this->datatype == 'INT' || $this->datatype == 'INTEGER' || $this->datatype == 'YEAR' || $this->datatype == 'BIGINT') { if($this->getlength() >= 0) { $s.="(".$this->getlength().")"; } if($this->unsigned) { $s.=" UNSIGNED"; } if($this->zerofill) { $s.=" ZEROFILL"; } } else if($this->datatype == 'REAL' || $this->datatype == 'DOUBLE' || $this->datatype == 'FLOAT' || $this->datatype == 'DECIMAL' || $this->datatype == 'NUMERIC') { //if($this->length >= 0) { $s.="(".$this->getlength(); //if($this->decimals >= 0) { $s.=", ".$this->getdecimals(); } $s.=")"; } if($this->unsigned) { $s.=" UNSIGNED"; } if($this->zerofill) { $s.=" ZEROFILL"; } } else if($this->datatype == 'ENUM' || $this->datatype == 'SET') { $s.="("; for($i = 0; $i < sizeof($this->values); $i++) { if($i > 0) { $s.=", "; } $s.=$this->values[$i]; } $s.=")"; } else { // should be a simple type... } if($this->null || $this->datatype == "TIMESTAMP") { $s.=" NULL"; } else { $s.=" NOT NULL"; } if(strlen($this->getdefault()) > 0) { $s.=" DEFAULT ".$this->getdefault(); } if($this->autoincrement) { $s.=" AUTO_INCREMENT"; } } else { echo "Unimplemented type $this->type\n"; die; } return $s; } function dump() { echo $this->getstring()."\n"; } function add_value($v) { array_push($this->values, $v); } function setdatatype($t) { $this->datatype = strtoupper($t); if($this->datatype == 'INTEGER') { $this->datatype = 'INT'; } if($this->datatype == 'NUMERIC') { $this->datatype = 'DECIMAL'; } } function setdefault($d) { // echo $this->name." default set to $d already has value".$this->default."\n"; $this->default = $d; } } class Table { var $name; var $columns; function & Table($name) { $this->name = $name; $this->columns = array(); return $this; } function & findcolumn($name) { for($i = 0; $i < sizeof($this->columns); $i++) { if($this->columns[$i]->name == $name) { return $this->columns[$i]; } } echo "Error: Column $name not found in table\n"; return false; } function ParseError($e, $t) { echo "Parse Error. $e expected but $t found\n"; die; } function parse() { global $trace; //echo "Parsing table $this->name\n"; /* 0 = start 1 = create_definition 2 = primary_key 3 = key 4 = index 5 = unique 6 = fulltexy 7 = foreign key (constraint) 8 = check 9 = column */ define("TAB_START", 0); define("TAB_CREATE", 1); define("TAB_PRIMARY_KEY", 2); define("TAB_KEY", 3); define("TAB_INDEX", 4); define("TAB_UNIQUE_KEY", 5); define("TAB_FULLTEXT", 6); define("TAB_FOREIGN_KEY", 7); define("TAB_CHECK", 8); define("TAB_COLUMN", 9); define("TAB_SKIP", 10); define("TAB_SEP", 11); define("TAB_TYPE", 111); define("TAB_CHAR", 12); define("TAB_INT", 13); define("TAB_ENUM", 14); define("TAB_TAIL", 15); define("TAB_DEFAULT", 151); define("TAB_NUMERIC", 16); $state = TAB_START; // consume up to the next ';' $dont_consume = false; // set to true when you want the new state to see this token while(true) { if($dont_consume == false) { $t = getToken(); //echo "$state $t\n"; } $dont_consume = false; if(strlen($t) == 0) { // unexpected EOF break; } if($t == ";") { break; } else { if($trace) echo "$t\n"; // consume... if($state == TAB_START) { if($t == "(") { $state = TAB_CREATE; } } else if($state == TAB_CREATE) { unset($column); if(TokenMatch($t, 'PRIMARY')) { $state = TAB_PRIMARY_KEY; } else if(TokenMatch($t, 'KEY')) { $state = TAB_KEY; } else if(TokenMatch($t, 'INDEX')) { $state = TAB_INDEX; } else if(TokenMatch($t, 'UNIQUE')) { $state = TAB_UNIQUE_KEY; } else if(TokenMatch($t, 'FULLTEXT')) { $state = TAB_FULLTEXT; } else if(TokenMatch($t, 'CONSTRAINT') || TokenMatch($t, 'FOREIGN')) { $state = TAB_FOREIGN_KEY; } else if(TokenMatch($t, 'CHECK')) { $state = TAB_CHECK; } else // must be column name { $column = new Column(COL_FIELD); array_push($this->columns, & $column); $column->name = $t; $state = TAB_COLUMN; } } else if($state == TAB_PRIMARY_KEY) // start of primary key { if(TokenMatch($t, 'KEY')) { // skip... } else if($t == '(') { unset($column); $column = new Column(COL_PRIMARY_KEY); array_push($this->columns, & $column); $state = 200; // get list of column names... } else { //echo "What is $t\n"; // mysql does not remember the name of PRIMARY KEY so we can't do much with it } } else if($state == 200) { if($t == ')') { //$column->dump(); $state = TAB_SEP; } else if ($t == ',') { // skip } else { $column->add_value($t); } } else if($state == TAB_UNIQUE_KEY || $state == TAB_FULLTEXT) // start of unique key { if(TokenMatch($t, 'KEY')) { unset($column); } else if($t == '(') { if(!isset($column)) { $column = new Column(COL_UNIQUE); array_push($this->columns, & $column); } $state = 200; // get list of column names... (reused from PRIMARY KEY) } else { // the name of the UNIQUE KEY unset($column); $column = new Column(COL_UNIQUE); array_push($this->columns, & $column); $column->name = $t; } } else if($state == TAB_COLUMN) // start of column { $column->setdatatype($t); if(TokenMatch($t, 'VARCHAR') || TokenMatch($t, 'CHAR')) { $state = TAB_CHAR; } else if(TokenMatch($t, 'TINYINT') || TokenMatch($t, 'SMALLINT') || TokenMatch($t, 'MEDIUMINT') || TokenMatch($t, 'INT') || TokenMatch($t, 'INTEGER') || TokenMatch($t, 'YEAR') || TokenMatch($t, 'TIMESTAMP') || // can a TIMESTAMP have a length? TokenMatch($t, 'BIGINT')) { $state = TAB_INT; } else if(TokenMatch($t, 'REAL') || TokenMatch($t, 'DOUBLE') || TokenMatch($t, 'FLOAT') || TokenMatch($t, 'DECIMAL') || TokenMatch($t, 'NUMERIC')) { $state = TAB_NUMERIC; } else if(TokenMatch($t, 'ENUM') || TokenMatch($t, 'SET')) { $state = TAB_ENUM; } else if(TokenMatch($t, 'DATE') || TokenMatch($t, 'TIME') || //TokenMatch($t, 'TIMESTAMP') || TokenMatch($t, 'DATETIME') || TokenMatch($t, 'TINYBLOB') || TokenMatch($t, 'BLOB') || TokenMatch($t, 'MEDIUMBLOB') || TokenMatch($t, 'LONGBLOB') || TokenMatch($t, 'TINYTEXT') || TokenMatch($t, 'TEXT') || TokenMatch($t, 'MEDIUMTEXT') || TokenMatch($t, 'LONGTEXT')) { $state = TAB_TAIL; } else { echo "Unknown data type $t\n"; die; } // skip until , or ) //$state = 10; } else if($state == TAB_SKIP) { echo "TAB_SKIP No longer used\n"; die; if($t == ",") { $state = 1; } else if($t == ")") { $state = 0; } else { // ignore this for now... } } else if($state == TAB_SEP) { if($t == ",") { $state = 1; } else if($t == ")") { // skip this } else if(TokenMatch($t, "TYPE")) { // this is the table type $state = TAB_TYPE; } else { // skip for now echo "TAB_SEP skipping $t\n"; } } else if($state == TAB_TYPE) { if($t == "=") { // skip } else { $this->type = $t; $state = $TAB_SEP; } } else if($state == TAB_CHAR) // match CHAR and VARCHAR style parameters { if($t == '(') { $state = 121; } else { ParseError("(", $t); die; } } else if($state == 121) { // this is the length of a CHAR or VARCHAR $column->length = $t; $state = 122; // close ) } else if($state == 122) { if($t == ")") { $state = 123; //possible binary... } else { ParseError(")", $t); } } else if($state == 123) { if(TokenMatch($t, 'BINARY')) { $column->binary = true; //echo "BINARY\n"; } else { $dont_consume = true; $state = TAB_TAIL; // column tail } } else if($state == TAB_INT) // match numeric (INT) style parameters { if($t == '(') { $state = 131; } else if(TokenMatch($t, 'UNSIGNED')) { $column->unsigned = true; } else if(TokenMatch($t, 'ZEROFILL')) { $column->zerofill = true; } else { $dont_consume = true; $state = TAB_TAIL; } } else if($state == 131) { if($t == ')') { $state = TAB_INT; } else { $column->length = $t; } } else if($state == TAB_ENUM) // match ENUM and SET style parameters { if($t == '(') { $state = 141; } } else if($state == 141) { if($t == ')') { //$column->dump(); $state = TAB_TAIL; } else if ($t == ',') { // skip } else { $column->add_value($t); } } else if($state == TAB_TAIL) // match column tail ([NOT NULL | NULL] [DEFAULT default_value] [AUTO_INCREMENT] [PRIMARY KEY] [reference_definition]) { if($t == ",") { //$column->dump(); $state = TAB_CREATE; } else if($t == ")") { //$column->dump(); $state = TAB_START; } else if(TokenMatch($t, "NULL")) { // skip } else if(TokenMatch($t, "NOT")) { $column->null = false; } else if(TokenMatch($t, "DEFAULT")) { $state = TAB_DEFAULT; } else if(TokenMatch($t, "AUTO_INCREMENT")) { $column->autoincrement = true; $column->null = false; // always false for auto_increment } else if(TokenMatch($t, "UNIQUE")) { // this means as well as the column, create a unique key... $column->unique = true; $unique_key = new Column(COL_UNIQUE); array_push($this->columns, & $unique_key); $unique_key->name = $column->name; $unique_key->add_value($column->name); } else { // ignore this for now... echo "TAB_TAIL skipping $t\n"; } } else if($state == TAB_DEFAULT) // process a column default = { if($t == "=") { // skip... } else { $column->setdefault($t); $state = TAB_TAIL; } } else if($state == TAB_KEY) { if($t == '(') { $state = 200; // get list of column names... (reused from PRIMARY KEY) } else { unset($column); $column = new Column(COL_KEY); $column->name = $t; array_push($this->columns, & $column); } } else if($state == TAB_NUMERIC) // match NUMERIC style parameters { if($t == '(') { $state = 161; } else if(TokenMatch($t, 'UNSIGNED')) { $column->unsigned = true; } else if(TokenMatch($t, 'ZEROFILL')) { $column->zerofill = true; } else { $dont_consume = true; $state = TAB_TAIL; } } else if($state == 161) // match length, or length) { if($t == ')') { $state = TAB_NUMERIC; } else if($t == ',') { $state = 162; } else { $column->length = $t; } } else if($state == 162) // match decimals) { if($t == ')') { $state = TAB_NUMERIC; } else { $column->decimals = $t; } } else { echo "Unknown state $state in Table::parse\n"; die; } } } //$this->dump(); //echo "Parse Done.\n\n"; } function dump() { echo "Table $this->name has\n"; for($i = 0; $i < sizeof($this->columns); $i++) { $this->columns[$i]->dump(); } echo "\n"; } } $token = array(); $next_token = 0; $tables = array(); function getToken() { global $token, $next_token; if($next_token >= sizeof($token)) { return ''; } else { return $token[$next_token++]; } } function addToken($t) { global $token; array_push($token, $t); } function instring($string, $c) { for($i = 0; $i < strlen($string); $i++) { if(substr($string, $i, 1) == $c) { return true; } } return false; } define("TOKEN_START", 0); define("TOKEN_NUMBER", 1); define("TOKEN_LITERAL", 2); define("TOKEN_STRING", 3); function tokenise($line) { global $token; $state = TOKEN_START; $line = $line."\n"; $t = ''; for($i = 0; $i < strlen($line); $i++) { $c = substr($line, $i, 1); if($state == TOKEN_NUMBER) { if($c >= '0' && $c <= '9') { $t = $t.$c; } else { addToken($t); $t = ''; $state = TOKEN_START; } } if($state == TOKEN_LITERAL) { if($c >= 'a' && $c <= 'z' || $c >= 'A' && $c <= 'Z' || instring('_', $c) || $c >= '0' && $c <= '9') { $t = $t.$c; } else { addToken($t); $t = ''; $state = TOKEN_START; } } if($state == TOKEN_STRING) { if($c == "'") { addToken("'".$t."'"); $t = ''; $c = ''; $state = TOKEN_START; } else { $t = $t.$c; } } if($state == TOKEN_START) { if(instring('(),;=', $c)) { addToken($c); $t = ''; } else if($c >= '0' && $c <= '9' || $c == '-') { // parse number $t = $c; $state = TOKEN_NUMBER; } else if($c >= 'a' && $c <= 'z' || $c >= 'A' && $c <= 'Z') { // parse string literal $t = $c; $state = TOKEN_LITERAL; } else if($c == "'") { // parse string constant $state = TOKEN_STRING; } } } } function load_file($filename) { echo "Reading file $filename\n"; $lines = file($filename); echo "Read ".sizeof($lines)." lines\n"; return process_file($lines); } function process_file($lines) { global $meta; for($i = 0; $i < sizeof($lines); $i++) { // strip off any comment $pos = strpos($lines[$i], "#"); if($pos === false) { $pos = strpos($lines[$i], "--"); } if($pos === false) { // no comment $line = trim($lines[$i]); } else { // we have a comment at #pos $comment = trim(substr($lines[$i], $pos + 1)); $line = trim(substr($lines[$i], 0, $pos)); //echo "$comment\n"; // Look for magic meta-info in the comments // database: name // host: name // user: name // password: name // rename?? $p = strpos($comment, ':'); if($p === false) { // no meta tag } else { $tag = substr($comment, 0, $p); $value = substr($comment, $p + 1); if(TokenMatch($tag, 'database') || TokenMatch($tag, 'user') || TokenMatch($tag, 'password') || TokenMatch($tag, 'host') || TokenMatch($tag, 'debug')) { $meta[$tag] = trim($value); //echo "$tag = $value\n"; } } } if(strlen($line) > 0) { //echo "$line\n"; tokenise($line); } } // parse the tokens. // this could be done with some fancy table driven parser but I don't care about most of the statements so at // this stage anyway, it will be a hack. /* 0 = no statement 1 = create statement 2 = create table statement 3 = ignore this statement */ $state = 0; // no statement $tables = array(); while(true) { $t = getToken(); if(strlen($t) == 0) { break; } if($state == 0) { if($t == 'CREATE') { $state = 1; } else { $state = 3; } } else if($state == 1) { if($t == 'TABLE') { $state = 2; } else { $state = 3; } } else if($state == 2) { // found create table... if($t == 'IF' || $t == 'NOT' || $t == 'EXISTS') { // skip this } else { //echo "CREATE TABLE $t\n"; // create a table object and hand over to it for further processing... $table = new Table($t); array_push($tables, & $table); $table->parse(); $state = 0; unset($table); } } else if($state == 3) { if($t == ';') { $state = 0; } } else { echo "Unknown state $state\n"; die; } } return $tables; } function compare_column($columna, $columnb, $table_name) { global $meta; if($columna->type != $columnb->type) { echo "ERROR: In table $table_name: Columns are no longer compatible\n"; $columna->dump(); $columnb->dump(); } else { // use the output generator to make strings to compare $stringa=$columna->getstring(); $stringb=$columnb->getstring(); if($stringa != $stringb) { if(strlen($meta['debug']) > 0) { echo "\n# $stringa"; echo "\n# $stringb\n"; } if($columnb->type == COL_PRIMARY_KEY) { $coltype = "PRIMARY KEY"; echo "ALTER TABLE $table_name DROP ".$coltype.";\n"; echo "ALTER TABLE $table_name ADD ".$stringa.";\n"; } else { echo "ALTER TABLE $table_name MODIFY ".$stringa.";\n"; } } } } function compare_table($tablea, $tableb) { // create a set of columns anotb, bnota, aandb $anotb = array(); $bnota = array(); $aandb = array(); for($i = 0; $i < sizeof($tablea->columns); $i++) { unset($column); $column = $tablea->columns[$i]; $name = $column->name; // find this column in table b $found = false; for($j = 0; $j < sizeof($tableb->columns); $j++) { if($tableb->columns[$j]->name == $name) { $found = true; break; } } if($found) { array_push($aandb, $name); } else { array_push($anotb, $name); } } for($i = 0; $i < sizeof($tableb->columns); $i++) { $column = $tableb->columns[$i]; $name = $column->name; // find this column in table a $found = false; for($j = 0; $j < sizeof($tablea->columns); $j++) { if($tablea->columns[$j]->name == $name) { $found = true; break; } } if(!$found) { array_push($bnota, $name); } unset($column); } //echo " Number of columns to add ".sizeof($anotb)."\n"; //echo " Number of columns to remove ".sizeof($bnota)."\n"; //echo " Number of columns to compare ".sizeof($aandb)."\n"; //$tablea->dump(); //$tableb->dump(); //echo "\n"; for($i = 0; $i < sizeof($anotb); $i++) { $column = $tablea->findcolumn($anotb[$i]); echo "ALTER TABLE ".$tablea->name." ADD ".$column->getstring().";\n"; } for($i = 0; $i < sizeof($bnota); $i++) { $column = $tableb->findcolumn($bnota[$i]); if($column->type == COL_KEY) { $cmd = "DROP INDEX"; } else { $cmd = "DROP"; } echo "ALTER TABLE ".$tablea->name." $cmd ".$bnota[$i].";\n"; } for($i = 0; $i < sizeof($aandb); $i++) { $name = $aandb[$i]; $columna = $tablea->findcolumn($name); $columnb = $tableb->findcolumn($name); compare_column($columna, $columnb, $tablea->name); } } echo "MySQL DataDict tool. Written by John Newbigin jn@it.swin.edu.au\n"; $args = $HTTP_SERVER_VARS["argv"]; // the fisrt file should be the datadict. $filename = $args[1]; if(!file_exists($filename)) { $filename = "datadict.sql"; if(!file_exists($filename)) { echo "Usage: mysql-dict datadict.sql [datadump.sql]\n"; die; } } $a = load_file($filename); flush(); // see if we have enough info to get the datadump if(strlen($meta['database']) > 0 && strlen($args[2]) == 0) { echo "Performing data dump\n"; $cmd = "mysqldump -d ".$meta['database']; if(strlen($meta['user']) > 0) { $cmd = $cmd." -u ".$meta['user']; } if(strlen($meta['password']) > 0) { $cmd = $cmd." --password=".$meta['password']; } if(strlen($meta['host']) > 0) { $cmd = $cmd." -h ".$meta['host']; } if($meta['debug'] == 'true') { echo "$cmd\n"; } $output = `$cmd`; $output = split("\n", $output); echo "Got ".sizeof($output)." lines\n"; $b = process_file($output); } else { // let's hope that the second parameter is a file $filename = $args[2]; if(file_exists($filename)) { $b = load_file($filename); } else { echo "Can't find the mysqldump $filename\n"; } } echo "\n# BEGIN OUTPUT\n"; $anotb = array(); $bnota = array(); $aandb = array(); for($i = 0; $i < sizeof($a); $i++) { $name = $a[$i]->name; $found = false; for($j = 0; $j < sizeof($b); $j++) { if($b[$j]->name == $name) { $found = true; break; } } if(!$found) { array_push($anotb, $name); } else { array_push($aandb, $name); } } for($i = 0; $i < sizeof($b); $i++) { $name = $b[$i]->name; $found = false; for($j = 0; $j < sizeof($a); $j++) { if($a[$j]->name == $name) { $found = true; break; } } if(!$found) { array_push($bnota, $name); } else { } } echo "\n\n"; //echo "Tables to be added ".sizeof($anotb)."\n"; for($i = 0; $i < sizeof($anotb); $i++) { echo "CREATE TABLE ".$anotb[$i].";\n"; } //echo "Tables to be removed ".sizeof($bnota)."\n"; for($i = 0; $i < sizeof($bnota); $i++) { echo "DROP TABLE ".$bnota[$i].";\n"; } //echo "Tables to be checked ".sizeof($aandb)."\n"; for($i = 0; $i < sizeof($aandb); $i++) { $name = $aandb[$i]; //echo " Checking table $name\n"; // search list a for the table object for($j = 0; $j < sizeof($a); $j++) { if($a[$j]->name == $name) { $tablea =& $a[$j]; break; } } for($j = 0; $j < sizeof($b); $j++) { if($b[$j]->name == $name) { $tableb =& $b[$j]; break; } } compare_table($tablea, $tableb); } echo "\n# END OUTPUT\n"; ?>