ABGSA: Difference between revisions

From HPCwiki
Jump to navigation Jump to search
No edit summary
 
(10 intermediate revisions by the same user not shown)
Line 4: Line 4:


== Metadata database ==
== Metadata database ==
Metadatabase name is ABGSAschema
  +-----------------------+
  | Tables_in_ABGSAschema |
  +-----------------------+
  | ABGSAschema_main      |
  | filesize              |
  | fqfile_attributes    |
  | gc_cont              |
  +-----------------------+
The most recent mysqldump file can be found at the root of the archive directory tree.
=== TABLE: ABGSAschema_main ===
  +-------------------------+--------------+------+-----+---------+-------+
  | Field                  | Type        | Null | Key | Default | Extra |
  +-------------------------+--------------+------+-----+---------+-------+
  | archive_name            | varchar(9)  | YES  |    | NULL    |      |
  | lane_names_orig        | varchar(120) | NO  | PRI |        |      |
  | readno                  | varchar(6)  | YES  |    | NULL    |      |
  | ABG_individual_id      | varchar(40)  | YES  |    | NULL    |      |
  | PORHAPNUMBER            | varchar(40)  | YES  |    | NULL    |      |
  | HapMap60Kcheck          | varchar(45)  | YES  |    | NULL    |      |
  | perc_ident              | float        | YES  |    | NULL    |      |
  | indiv_pop_generic_name  | varchar(60)  | YES  |    | NULL    |      |
  | library_type            | varchar(40)  | YES  |    | NULL    |      |
  | md5sum_gzip            | varchar(35)  | YES  |    | NULL    |      |
  | md5sum_txt              | varchar(35)  | YES  |    | NULL    |      |
  | seq_format              | varchar(8)  | YES  |    | NULL    |      |
  | qscore_format          | varchar(16)  | YES  |    | NULL    |      |
  | generated_by            | varchar(30)  | YES  |    | NULL    |      |
  | Machine_type            | varchar(30)  | YES  |    | NULL    |      |
  | date_generated          | varchar(12)  | YES  |    | NULL    |      |
  | Lane_in_run            | varchar(4)  | YES  |    | NULL    |      |
  | lib_previously_known_as | varchar(30)  | YES  |    | NULL    |      |
  | remark                  | varchar(100) | YES  |    | NULL    |      |
  +-------------------------+--------------+------+-----+---------+-------+
<source lang='mysql'>
CREATE TABLE `ABGSAschema_main` (
  `archive_name` varchar(9) DEFAULT NULL,
  `lane_names_orig` varchar(120) NOT NULL DEFAULT '',
  `readno` varchar(6) DEFAULT NULL,
  `ABG_individual_id` varchar(40) DEFAULT NULL,
  `PORHAPNUMBER` varchar(40) DEFAULT NULL,
  `HapMap60Kcheck` varchar(45) DEFAULT NULL,
  `perc_ident` float DEFAULT NULL,
  `indiv_pop_generic_name` varchar(60) DEFAULT NULL,
  `library_type` varchar(40) DEFAULT NULL,
  `md5sum_gzip` varchar(35) DEFAULT NULL,
  `md5sum_txt` varchar(35) DEFAULT NULL,
  `seq_format` varchar(8) DEFAULT NULL,
  `qscore_format` varchar(16) DEFAULT NULL,
  `generated_by` varchar(30) DEFAULT NULL,
  `Machine_type` varchar(30) DEFAULT NULL,
  `date_generated` varchar(12) DEFAULT NULL,
  `Lane_in_run` varchar(4) DEFAULT NULL,
  `lib_previously_known_as` varchar(30) DEFAULT NULL,
  `remark` varchar(100) DEFAULT NULL,
  PRIMARY KEY (`lane_names_orig`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1 |
</source>
=== TABLE: fqfile_attributes ===
  +------------------+--------------+------+-----+-------------------+----------------+
  | Field            | Type        | Null | Key | Default          | Extra          |
  +------------------+--------------+------+-----+-------------------+----------------+
  | id              | int(11)      | NO  | PRI | NULL              | auto_increment |
  | lane_names_orig  | varchar(120) | YES  |    | NULL              |                |
  | md5check        | varchar(120) | YES  |    | NULL              |                |
  | filesize        | bigint(15)  | YES  |    | NULL              |                |
  | numlines_in_gzip | bigint(15)  | YES  |    | NULL              |                |
  | qval_offset      | varchar(10)  | YES  |    | NULL              |                |
  | maxlength_seq    | int(6)      | YES  |    | NULL              |                |
  | totnumbases      | bigint(15)  | YES  |    | NULL              |                |
  | totnumreads      | int(12)      | YES  |    | NULL              |                |
  | firstreadname    | varchar(120) | YES  |    | NULL              |                |
  | lastreadname    | varchar(120) | YES  |    | NULL              |                |
  | ts_create        | timestamp    | NO  |    | CURRENT_TIMESTAMP |                |
  +------------------+--------------+------+-----+-------------------+----------------+
<source lang='mysql'>
CREATE TABLE `fqfile_attributes` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `lane_names_orig` varchar(120) DEFAULT NULL,
  `md5check` varchar(120) DEFAULT NULL,
  `filesize` bigint(15) DEFAULT NULL,
  `numlines_in_gzip` bigint(15) DEFAULT NULL,
  `qval_offset` varchar(10) DEFAULT NULL,
  `maxlength_seq` int(6) DEFAULT NULL,
  `totnumbases` bigint(15) DEFAULT NULL,
  `totnumreads` int(12) DEFAULT NULL,
  `firstreadname` varchar(120) DEFAULT NULL,
  `lastreadname` varchar(120) DEFAULT NULL,
  `ts_create` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1561 DEFAULT CHARSET=latin1
</source>
=== TABLE: gc_cont ===
  +-----------------+--------------+------+-----+-------------------+----------------+
  | Field          | Type        | Null | Key | Default          | Extra          |
  +-----------------+--------------+------+-----+-------------------+----------------+
  | id              | int(11)      | NO  | PRI | NULL              | auto_increment |
  | lane_names_orig | varchar(120) | YES  |    | NULL              |                |
  | gc_cont        | float        | YES  |    | NULL              |                |
  | ts_create      | timestamp    | NO  |    | CURRENT_TIMESTAMP |                |
  +-----------------+--------------+------+-----+-------------------+----------------+
<source lang='mysql'>
CREATE TABLE `gc_cont` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `lane_names_orig` varchar(120) DEFAULT NULL,
  `gc_cont` float DEFAULT NULL,
  `ts_create` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1561 DEFAULT CHARSET=latin1
</source>
=== TABLE: filesize ===
This table is currently redundant with information in TABLE: fqfile_attributes. Slated to be removed from the database altogether to avoid confusion in future additions to the database. In addition, current table is not up to date for all entries.
  +-----------------+--------------+------+-----+---------+-------+
  | Field          | Type        | Null | Key | Default | Extra |
  +-----------------+--------------+------+-----+---------+-------+
  | lane_names_orig | varchar(120) | YES  |    | NULL    |      |
  | filesize        | bigint(15)  | YES  |    | NULL    |      |
  | md5check        | varchar(35)  | YES  |    | NULL    |      |
  +-----------------+--------------+------+-----+---------+-------+
<source lang='mysql'>
CREATE TABLE `filesize` (
  `lane_names_orig` varchar(120) DEFAULT NULL,
  `filesize` bigint(15) DEFAULT NULL,
  `md5check` varchar(35) DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
</source>
== Creating the metadata and insert in database ==
=== Main metadata ===
https://github.com/hjmegens/NGStools/blob/master/various_scripts/ABGSAschema/get_test_attributes.py
=== GC content ===
https://github.com/hjmegens/NGStools/blob/master/various_scripts/ABGSAschema/get_gccont.py


== Checking data integrity ==
== Checking data integrity ==
https://github.com/hjmegens/NGStools/blob/master/various_scripts/ABGSAschema/ABGSA_integrity_extdrives.pl


== Backing up ==
== Backing up ==

Latest revision as of 11:31, 30 November 2013

Rationale of the ABGSA

Description of the layout

Metadata database

Metadatabase name is ABGSAschema

 +-----------------------+
 | Tables_in_ABGSAschema |
 +-----------------------+
 | ABGSAschema_main      |
 | filesize              |
 | fqfile_attributes     |
 | gc_cont               |
 +-----------------------+

The most recent mysqldump file can be found at the root of the archive directory tree.

TABLE: ABGSAschema_main

 +-------------------------+--------------+------+-----+---------+-------+
 | Field                   | Type         | Null | Key | Default | Extra |
 +-------------------------+--------------+------+-----+---------+-------+
 | archive_name            | varchar(9)   | YES  |     | NULL    |       |
 | lane_names_orig         | varchar(120) | NO   | PRI |         |       |
 | readno                  | varchar(6)   | YES  |     | NULL    |       |
 | ABG_individual_id       | varchar(40)  | YES  |     | NULL    |       |
 | PORHAPNUMBER            | varchar(40)  | YES  |     | NULL    |       |
 | HapMap60Kcheck          | varchar(45)  | YES  |     | NULL    |       |
 | perc_ident              | float        | YES  |     | NULL    |       |
 | indiv_pop_generic_name  | varchar(60)  | YES  |     | NULL    |       |
 | library_type            | varchar(40)  | YES  |     | NULL    |       |
 | md5sum_gzip             | varchar(35)  | YES  |     | NULL    |       |
 | md5sum_txt              | varchar(35)  | YES  |     | NULL    |       |
 | seq_format              | varchar(8)   | YES  |     | NULL    |       |
 | qscore_format           | varchar(16)  | YES  |     | NULL    |       |
 | generated_by            | varchar(30)  | YES  |     | NULL    |       |
 | Machine_type            | varchar(30)  | YES  |     | NULL    |       |
 | date_generated          | varchar(12)  | YES  |     | NULL    |       |
 | Lane_in_run             | varchar(4)   | YES  |     | NULL    |       |
 | lib_previously_known_as | varchar(30)  | YES  |     | NULL    |       |
 | remark                  | varchar(100) | YES  |     | NULL    |       |
 +-------------------------+--------------+------+-----+---------+-------+

<source lang='mysql'> CREATE TABLE `ABGSAschema_main` (

 `archive_name` varchar(9) DEFAULT NULL,
 `lane_names_orig` varchar(120) NOT NULL DEFAULT ,
 `readno` varchar(6) DEFAULT NULL,
 `ABG_individual_id` varchar(40) DEFAULT NULL,
 `PORHAPNUMBER` varchar(40) DEFAULT NULL,
 `HapMap60Kcheck` varchar(45) DEFAULT NULL,
 `perc_ident` float DEFAULT NULL,
 `indiv_pop_generic_name` varchar(60) DEFAULT NULL,
 `library_type` varchar(40) DEFAULT NULL,
 `md5sum_gzip` varchar(35) DEFAULT NULL,
 `md5sum_txt` varchar(35) DEFAULT NULL,
 `seq_format` varchar(8) DEFAULT NULL,
 `qscore_format` varchar(16) DEFAULT NULL,
 `generated_by` varchar(30) DEFAULT NULL,
 `Machine_type` varchar(30) DEFAULT NULL,
 `date_generated` varchar(12) DEFAULT NULL,
 `Lane_in_run` varchar(4) DEFAULT NULL,
 `lib_previously_known_as` varchar(30) DEFAULT NULL,
 `remark` varchar(100) DEFAULT NULL,
 PRIMARY KEY (`lane_names_orig`)

) ENGINE=InnoDB DEFAULT CHARSET=latin1 | </source>

TABLE: fqfile_attributes

 +------------------+--------------+------+-----+-------------------+----------------+
 | Field            | Type         | Null | Key | Default           | Extra          |
 +------------------+--------------+------+-----+-------------------+----------------+
 | id               | int(11)      | NO   | PRI | NULL              | auto_increment |
 | lane_names_orig  | varchar(120) | YES  |     | NULL              |                |
 | md5check         | varchar(120) | YES  |     | NULL              |                |
 | filesize         | bigint(15)   | YES  |     | NULL              |                |
 | numlines_in_gzip | bigint(15)   | YES  |     | NULL              |                |
 | qval_offset      | varchar(10)  | YES  |     | NULL              |                |
 | maxlength_seq    | int(6)       | YES  |     | NULL              |                |
 | totnumbases      | bigint(15)   | YES  |     | NULL              |                |
 | totnumreads      | int(12)      | YES  |     | NULL              |                |
 | firstreadname    | varchar(120) | YES  |     | NULL              |                |
 | lastreadname     | varchar(120) | YES  |     | NULL              |                |
 | ts_create        | timestamp    | NO   |     | CURRENT_TIMESTAMP |                |
 +------------------+--------------+------+-----+-------------------+----------------+

<source lang='mysql'> CREATE TABLE `fqfile_attributes` (

 `id` int(11) NOT NULL AUTO_INCREMENT,
 `lane_names_orig` varchar(120) DEFAULT NULL,
 `md5check` varchar(120) DEFAULT NULL,
 `filesize` bigint(15) DEFAULT NULL,
 `numlines_in_gzip` bigint(15) DEFAULT NULL,
 `qval_offset` varchar(10) DEFAULT NULL,
 `maxlength_seq` int(6) DEFAULT NULL,
 `totnumbases` bigint(15) DEFAULT NULL,
 `totnumreads` int(12) DEFAULT NULL,
 `firstreadname` varchar(120) DEFAULT NULL,
 `lastreadname` varchar(120) DEFAULT NULL,
 `ts_create` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
 PRIMARY KEY (`id`)

) ENGINE=InnoDB AUTO_INCREMENT=1561 DEFAULT CHARSET=latin1 </source>

TABLE: gc_cont

 +-----------------+--------------+------+-----+-------------------+----------------+
 | Field           | Type         | Null | Key | Default           | Extra          |
 +-----------------+--------------+------+-----+-------------------+----------------+
 | id              | int(11)      | NO   | PRI | NULL              | auto_increment |
 | lane_names_orig | varchar(120) | YES  |     | NULL              |                |
 | gc_cont         | float        | YES  |     | NULL              |                |
 | ts_create       | timestamp    | NO   |     | CURRENT_TIMESTAMP |                |
 +-----------------+--------------+------+-----+-------------------+----------------+

<source lang='mysql'>

CREATE TABLE `gc_cont` (
 `id` int(11) NOT NULL AUTO_INCREMENT,
 `lane_names_orig` varchar(120) DEFAULT NULL,
 `gc_cont` float DEFAULT NULL,
 `ts_create` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
 PRIMARY KEY (`id`)

) ENGINE=InnoDB AUTO_INCREMENT=1561 DEFAULT CHARSET=latin1 </source>

TABLE: filesize

This table is currently redundant with information in TABLE: fqfile_attributes. Slated to be removed from the database altogether to avoid confusion in future additions to the database. In addition, current table is not up to date for all entries.

 +-----------------+--------------+------+-----+---------+-------+
 | Field           | Type         | Null | Key | Default | Extra |
 +-----------------+--------------+------+-----+---------+-------+
 | lane_names_orig | varchar(120) | YES  |     | NULL    |       |
 | filesize        | bigint(15)   | YES  |     | NULL    |       |
 | md5check        | varchar(35)  | YES  |     | NULL    |       |
 +-----------------+--------------+------+-----+---------+-------+

<source lang='mysql'> CREATE TABLE `filesize` (

 `lane_names_orig` varchar(120) DEFAULT NULL,
 `filesize` bigint(15) DEFAULT NULL,
 `md5check` varchar(35) DEFAULT NULL

) ENGINE=MyISAM DEFAULT CHARSET=latin1 </source>

Creating the metadata and insert in database

Main metadata

https://github.com/hjmegens/NGStools/blob/master/various_scripts/ABGSAschema/get_test_attributes.py

GC content

https://github.com/hjmegens/NGStools/blob/master/various_scripts/ABGSAschema/get_gccont.py

Checking data integrity

https://github.com/hjmegens/NGStools/blob/master/various_scripts/ABGSAschema/ABGSA_integrity_extdrives.pl

Backing up

See also

External links