Changes by last author:

Added:
This is a small Prolog script that will split a FASTA sequence file into single files for each sequence and place them in the directory output/.

The file name is parsed from the sequence name, which is split by the character |.

Start the interpreter (SWI-Prolog) with "swipl -L1000M -G1000M" to allocate enough memory for the stacks.

Then load the script file: [split].

Then run it on a sequence file: run('seqdb.fasta').

<code>

run(F) :-

use_module(sbcl(toolbox)),

open(F,read,Fstr),

parse(Fstr),

close(Fstr).

parse(Fstr) :-

repeat,

toolkit:read_txtline(Fstr,Line),

%format("0 read: ~s~n",[Line]),

parse(Fstr,Line),

at_end_of_stream(Fstr),!.

parse(_).

parse(Fstr,Line) :-

append(">",SeqName?,Line), !, % ">" = 62

toolkit:string2list(SeqName?,124,SeqIdList?), % "|" = 124

[Org,Constr,Id|_]=SeqIdList?,

sformat(FN,'output/~a.fas',[Id]),

( exists_file(FN) ->

format("File ~a already exists!~n",[FN])

;

open(FN,write,Sstr),

format(Sstr,"~s~n",[Line]),

write_sequence(Fstr,Sstr)

),!.

parse(_,_).

write_sequence(Fstr,Sstr) :-

toolkit:read_txtline(Fstr,Line),

%format("1 read: ~s~n",[Line]),

( [62|_]=Line ->

close(Sstr),

parse(Fstr,Line)

;

format(Sstr,"~s~n",[Line])

),

\+at_end_of_stream(Fstr),

write_sequence(Fstr,Sstr).

</code>