ABGSA: Difference between revisions
No edit summary |
|||
(10 intermediate revisions by the same user not shown) | |||
Line 4: | Line 4: | ||
== Metadata database == | == Metadata database == | ||
Metadatabase name is ABGSAschema | |||
+-----------------------+ | |||
| Tables_in_ABGSAschema | | |||
+-----------------------+ | |||
| ABGSAschema_main | | |||
| filesize | | |||
| fqfile_attributes | | |||
| gc_cont | | |||
+-----------------------+ | |||
The most recent mysqldump file can be found at the root of the archive directory tree. | |||
=== TABLE: ABGSAschema_main === | |||
+-------------------------+--------------+------+-----+---------+-------+ | |||
| Field | Type | Null | Key | Default | Extra | | |||
+-------------------------+--------------+------+-----+---------+-------+ | |||
| archive_name | varchar(9) | YES | | NULL | | | |||
| lane_names_orig | varchar(120) | NO | PRI | | | | |||
| readno | varchar(6) | YES | | NULL | | | |||
| ABG_individual_id | varchar(40) | YES | | NULL | | | |||
| PORHAPNUMBER | varchar(40) | YES | | NULL | | | |||
| HapMap60Kcheck | varchar(45) | YES | | NULL | | | |||
| perc_ident | float | YES | | NULL | | | |||
| indiv_pop_generic_name | varchar(60) | YES | | NULL | | | |||
| library_type | varchar(40) | YES | | NULL | | | |||
| md5sum_gzip | varchar(35) | YES | | NULL | | | |||
| md5sum_txt | varchar(35) | YES | | NULL | | | |||
| seq_format | varchar(8) | YES | | NULL | | | |||
| qscore_format | varchar(16) | YES | | NULL | | | |||
| generated_by | varchar(30) | YES | | NULL | | | |||
| Machine_type | varchar(30) | YES | | NULL | | | |||
| date_generated | varchar(12) | YES | | NULL | | | |||
| Lane_in_run | varchar(4) | YES | | NULL | | | |||
| lib_previously_known_as | varchar(30) | YES | | NULL | | | |||
| remark | varchar(100) | YES | | NULL | | | |||
+-------------------------+--------------+------+-----+---------+-------+ | |||
<source lang='mysql'> | |||
CREATE TABLE `ABGSAschema_main` ( | |||
`archive_name` varchar(9) DEFAULT NULL, | |||
`lane_names_orig` varchar(120) NOT NULL DEFAULT '', | |||
`readno` varchar(6) DEFAULT NULL, | |||
`ABG_individual_id` varchar(40) DEFAULT NULL, | |||
`PORHAPNUMBER` varchar(40) DEFAULT NULL, | |||
`HapMap60Kcheck` varchar(45) DEFAULT NULL, | |||
`perc_ident` float DEFAULT NULL, | |||
`indiv_pop_generic_name` varchar(60) DEFAULT NULL, | |||
`library_type` varchar(40) DEFAULT NULL, | |||
`md5sum_gzip` varchar(35) DEFAULT NULL, | |||
`md5sum_txt` varchar(35) DEFAULT NULL, | |||
`seq_format` varchar(8) DEFAULT NULL, | |||
`qscore_format` varchar(16) DEFAULT NULL, | |||
`generated_by` varchar(30) DEFAULT NULL, | |||
`Machine_type` varchar(30) DEFAULT NULL, | |||
`date_generated` varchar(12) DEFAULT NULL, | |||
`Lane_in_run` varchar(4) DEFAULT NULL, | |||
`lib_previously_known_as` varchar(30) DEFAULT NULL, | |||
`remark` varchar(100) DEFAULT NULL, | |||
PRIMARY KEY (`lane_names_orig`) | |||
) ENGINE=InnoDB DEFAULT CHARSET=latin1 | | |||
</source> | |||
=== TABLE: fqfile_attributes === | |||
+------------------+--------------+------+-----+-------------------+----------------+ | |||
| Field | Type | Null | Key | Default | Extra | | |||
+------------------+--------------+------+-----+-------------------+----------------+ | |||
| id | int(11) | NO | PRI | NULL | auto_increment | | |||
| lane_names_orig | varchar(120) | YES | | NULL | | | |||
| md5check | varchar(120) | YES | | NULL | | | |||
| filesize | bigint(15) | YES | | NULL | | | |||
| numlines_in_gzip | bigint(15) | YES | | NULL | | | |||
| qval_offset | varchar(10) | YES | | NULL | | | |||
| maxlength_seq | int(6) | YES | | NULL | | | |||
| totnumbases | bigint(15) | YES | | NULL | | | |||
| totnumreads | int(12) | YES | | NULL | | | |||
| firstreadname | varchar(120) | YES | | NULL | | | |||
| lastreadname | varchar(120) | YES | | NULL | | | |||
| ts_create | timestamp | NO | | CURRENT_TIMESTAMP | | | |||
+------------------+--------------+------+-----+-------------------+----------------+ | |||
<source lang='mysql'> | |||
CREATE TABLE `fqfile_attributes` ( | |||
`id` int(11) NOT NULL AUTO_INCREMENT, | |||
`lane_names_orig` varchar(120) DEFAULT NULL, | |||
`md5check` varchar(120) DEFAULT NULL, | |||
`filesize` bigint(15) DEFAULT NULL, | |||
`numlines_in_gzip` bigint(15) DEFAULT NULL, | |||
`qval_offset` varchar(10) DEFAULT NULL, | |||
`maxlength_seq` int(6) DEFAULT NULL, | |||
`totnumbases` bigint(15) DEFAULT NULL, | |||
`totnumreads` int(12) DEFAULT NULL, | |||
`firstreadname` varchar(120) DEFAULT NULL, | |||
`lastreadname` varchar(120) DEFAULT NULL, | |||
`ts_create` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, | |||
PRIMARY KEY (`id`) | |||
) ENGINE=InnoDB AUTO_INCREMENT=1561 DEFAULT CHARSET=latin1 | |||
</source> | |||
=== TABLE: gc_cont === | |||
+-----------------+--------------+------+-----+-------------------+----------------+ | |||
| Field | Type | Null | Key | Default | Extra | | |||
+-----------------+--------------+------+-----+-------------------+----------------+ | |||
| id | int(11) | NO | PRI | NULL | auto_increment | | |||
| lane_names_orig | varchar(120) | YES | | NULL | | | |||
| gc_cont | float | YES | | NULL | | | |||
| ts_create | timestamp | NO | | CURRENT_TIMESTAMP | | | |||
+-----------------+--------------+------+-----+-------------------+----------------+ | |||
<source lang='mysql'> | |||
CREATE TABLE `gc_cont` ( | |||
`id` int(11) NOT NULL AUTO_INCREMENT, | |||
`lane_names_orig` varchar(120) DEFAULT NULL, | |||
`gc_cont` float DEFAULT NULL, | |||
`ts_create` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, | |||
PRIMARY KEY (`id`) | |||
) ENGINE=InnoDB AUTO_INCREMENT=1561 DEFAULT CHARSET=latin1 | |||
</source> | |||
=== TABLE: filesize === | |||
This table is currently redundant with information in TABLE: fqfile_attributes. Slated to be removed from the database altogether to avoid confusion in future additions to the database. In addition, current table is not up to date for all entries. | |||
+-----------------+--------------+------+-----+---------+-------+ | |||
| Field | Type | Null | Key | Default | Extra | | |||
+-----------------+--------------+------+-----+---------+-------+ | |||
| lane_names_orig | varchar(120) | YES | | NULL | | | |||
| filesize | bigint(15) | YES | | NULL | | | |||
| md5check | varchar(35) | YES | | NULL | | | |||
+-----------------+--------------+------+-----+---------+-------+ | |||
<source lang='mysql'> | |||
CREATE TABLE `filesize` ( | |||
`lane_names_orig` varchar(120) DEFAULT NULL, | |||
`filesize` bigint(15) DEFAULT NULL, | |||
`md5check` varchar(35) DEFAULT NULL | |||
) ENGINE=MyISAM DEFAULT CHARSET=latin1 | |||
</source> | |||
== Creating the metadata and insert in database == | |||
=== Main metadata === | |||
https://github.com/hjmegens/NGStools/blob/master/various_scripts/ABGSAschema/get_test_attributes.py | |||
=== GC content === | |||
https://github.com/hjmegens/NGStools/blob/master/various_scripts/ABGSAschema/get_gccont.py | |||
== Checking data integrity == | == Checking data integrity == | ||
https://github.com/hjmegens/NGStools/blob/master/various_scripts/ABGSAschema/ABGSA_integrity_extdrives.pl | |||
== Backing up == | == Backing up == |
Latest revision as of 11:31, 30 November 2013
Rationale of the ABGSA
Description of the layout
Metadata database
Metadatabase name is ABGSAschema
+-----------------------+ | Tables_in_ABGSAschema | +-----------------------+ | ABGSAschema_main | | filesize | | fqfile_attributes | | gc_cont | +-----------------------+
The most recent mysqldump file can be found at the root of the archive directory tree.
TABLE: ABGSAschema_main
+-------------------------+--------------+------+-----+---------+-------+ | Field | Type | Null | Key | Default | Extra | +-------------------------+--------------+------+-----+---------+-------+ | archive_name | varchar(9) | YES | | NULL | | | lane_names_orig | varchar(120) | NO | PRI | | | | readno | varchar(6) | YES | | NULL | | | ABG_individual_id | varchar(40) | YES | | NULL | | | PORHAPNUMBER | varchar(40) | YES | | NULL | | | HapMap60Kcheck | varchar(45) | YES | | NULL | | | perc_ident | float | YES | | NULL | | | indiv_pop_generic_name | varchar(60) | YES | | NULL | | | library_type | varchar(40) | YES | | NULL | | | md5sum_gzip | varchar(35) | YES | | NULL | | | md5sum_txt | varchar(35) | YES | | NULL | | | seq_format | varchar(8) | YES | | NULL | | | qscore_format | varchar(16) | YES | | NULL | | | generated_by | varchar(30) | YES | | NULL | | | Machine_type | varchar(30) | YES | | NULL | | | date_generated | varchar(12) | YES | | NULL | | | Lane_in_run | varchar(4) | YES | | NULL | | | lib_previously_known_as | varchar(30) | YES | | NULL | | | remark | varchar(100) | YES | | NULL | | +-------------------------+--------------+------+-----+---------+-------+
<source lang='mysql'> CREATE TABLE `ABGSAschema_main` (
`archive_name` varchar(9) DEFAULT NULL, `lane_names_orig` varchar(120) NOT NULL DEFAULT , `readno` varchar(6) DEFAULT NULL, `ABG_individual_id` varchar(40) DEFAULT NULL, `PORHAPNUMBER` varchar(40) DEFAULT NULL, `HapMap60Kcheck` varchar(45) DEFAULT NULL, `perc_ident` float DEFAULT NULL, `indiv_pop_generic_name` varchar(60) DEFAULT NULL, `library_type` varchar(40) DEFAULT NULL, `md5sum_gzip` varchar(35) DEFAULT NULL, `md5sum_txt` varchar(35) DEFAULT NULL, `seq_format` varchar(8) DEFAULT NULL, `qscore_format` varchar(16) DEFAULT NULL, `generated_by` varchar(30) DEFAULT NULL, `Machine_type` varchar(30) DEFAULT NULL, `date_generated` varchar(12) DEFAULT NULL, `Lane_in_run` varchar(4) DEFAULT NULL, `lib_previously_known_as` varchar(30) DEFAULT NULL, `remark` varchar(100) DEFAULT NULL, PRIMARY KEY (`lane_names_orig`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1 | </source>
TABLE: fqfile_attributes
+------------------+--------------+------+-----+-------------------+----------------+ | Field | Type | Null | Key | Default | Extra | +------------------+--------------+------+-----+-------------------+----------------+ | id | int(11) | NO | PRI | NULL | auto_increment | | lane_names_orig | varchar(120) | YES | | NULL | | | md5check | varchar(120) | YES | | NULL | | | filesize | bigint(15) | YES | | NULL | | | numlines_in_gzip | bigint(15) | YES | | NULL | | | qval_offset | varchar(10) | YES | | NULL | | | maxlength_seq | int(6) | YES | | NULL | | | totnumbases | bigint(15) | YES | | NULL | | | totnumreads | int(12) | YES | | NULL | | | firstreadname | varchar(120) | YES | | NULL | | | lastreadname | varchar(120) | YES | | NULL | | | ts_create | timestamp | NO | | CURRENT_TIMESTAMP | | +------------------+--------------+------+-----+-------------------+----------------+
<source lang='mysql'> CREATE TABLE `fqfile_attributes` (
`id` int(11) NOT NULL AUTO_INCREMENT, `lane_names_orig` varchar(120) DEFAULT NULL, `md5check` varchar(120) DEFAULT NULL, `filesize` bigint(15) DEFAULT NULL, `numlines_in_gzip` bigint(15) DEFAULT NULL, `qval_offset` varchar(10) DEFAULT NULL, `maxlength_seq` int(6) DEFAULT NULL, `totnumbases` bigint(15) DEFAULT NULL, `totnumreads` int(12) DEFAULT NULL, `firstreadname` varchar(120) DEFAULT NULL, `lastreadname` varchar(120) DEFAULT NULL, `ts_create` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1561 DEFAULT CHARSET=latin1 </source>
TABLE: gc_cont
+-----------------+--------------+------+-----+-------------------+----------------+ | Field | Type | Null | Key | Default | Extra | +-----------------+--------------+------+-----+-------------------+----------------+ | id | int(11) | NO | PRI | NULL | auto_increment | | lane_names_orig | varchar(120) | YES | | NULL | | | gc_cont | float | YES | | NULL | | | ts_create | timestamp | NO | | CURRENT_TIMESTAMP | | +-----------------+--------------+------+-----+-------------------+----------------+
<source lang='mysql'>
CREATE TABLE `gc_cont` ( `id` int(11) NOT NULL AUTO_INCREMENT, `lane_names_orig` varchar(120) DEFAULT NULL, `gc_cont` float DEFAULT NULL, `ts_create` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1561 DEFAULT CHARSET=latin1 </source>
TABLE: filesize
This table is currently redundant with information in TABLE: fqfile_attributes. Slated to be removed from the database altogether to avoid confusion in future additions to the database. In addition, current table is not up to date for all entries.
+-----------------+--------------+------+-----+---------+-------+ | Field | Type | Null | Key | Default | Extra | +-----------------+--------------+------+-----+---------+-------+ | lane_names_orig | varchar(120) | YES | | NULL | | | filesize | bigint(15) | YES | | NULL | | | md5check | varchar(35) | YES | | NULL | | +-----------------+--------------+------+-----+---------+-------+
<source lang='mysql'> CREATE TABLE `filesize` (
`lane_names_orig` varchar(120) DEFAULT NULL, `filesize` bigint(15) DEFAULT NULL, `md5check` varchar(35) DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1 </source>
Creating the metadata and insert in database
Main metadata
https://github.com/hjmegens/NGStools/blob/master/various_scripts/ABGSAschema/get_test_attributes.py
GC content
https://github.com/hjmegens/NGStools/blob/master/various_scripts/ABGSAschema/get_gccont.py