#!/usr/bin/perl
###################################
#Author :Jiang Li
#Email  :riverlee2008@gmail.com
#MSN    :riverlee2008@live.cn
#Address:Harbin Medical University
#TEl    :+86-13936514493
###################################
use strict;
use warnings;

my $usage=<<USAGE;
perl $0 inputfile
		--inputfile is the obo format file you downloaded from Disease Ontology
		  for example perl $0 HumanDO.obo
USAGE

#check the parameter
if(@ARGV<1){
	print $usage;
	exit(1);
}

my $infile=$ARGV[0];


#read the inputed file and construct a file with four columns which correspond to the 
#table do_synonym in DO.db
open(IN,$infile) or die $!;
open(O1,">do_synonym.txt") or die $!;

print O1 "_id\tsynonym\tsecondary\tlike_do_id\n";

=sample
[Term]
id: DOID:0014667
name: disease of metabolism
synonym: "disorder of metabolism NOS" EXACT [SNOMEDCT_2005_07_31:190961002]
synonym: "disorder of metabolism NOS (disorder)" EXACT [SNOMEDCT_2005_07_31:267456000]
alt_id: DOID:12976
alt_id: DOID:12977

=cut


my $str="";
my $is_finish=0;
my %content;
while(<IN>){
	if($is_finish){last;}
	if(/^\[Term\]/ || /^\[Typedef\]/){
		if($str){
			#get the id and names
			unless($str=~/^\[Term\]/){$str=$_;next ;}
			my ($doid,$name);
			if($str=~/id: (DOID:\d+)/){
				$doid=$1;
			}
							
			my $flag=0;
			#determine whether have string "is_obsolete: true"
			if($str=~/is_obsolete/){
				$flag=1;
			}
			
			unless($flag){
				while($str=~/alt_id: (DOID:.*?)\n|synonym: (.*?)\n/g){
					my $match="";
					if($1){
						$match=$1;
					}else{
						$match=$2;
					}
					if($match=~/^DOID/){
						$match=~s/\t/ /g;
						push @{$content{$doid}},[$match,$match,1];
					}else{
						$match=~s/\t/ /g;
						push @{$content{$doid}},[$match,"",0];
					}
				}
			}
		}
		$str=$_;
		if( /^\[Typedef\]/){
			$is_finish=1;
		}
	}else{
		$str.=$_;
	}
}

###################################
#we need the result generated by prepare_do_term.pl
open(II,"do_term.txt") or die $!;<II>;
my %doid2id;
while(<II>){
	s/\r|\n//g;
	next unless($_);
	my($id,$doid,$term) = split "\t";
	$doid2id{$doid}=$id;
}

foreach my $doid(sort keys %content){
	foreach my  $ref (@{$content{$doid}}){
		print O1 join "\t",($doid2id{$doid},@{$ref});
		print O1 "\n";
	}

}
close II;
close O1;
close IN;
