This is a small Prolog script that will split a FASTA sequence file into single files for each sequence and place them in the directory output/.
The file name is parsed from the sequence name, which is split by the character |.
Start the interpreter (SWI-Prolog) with "swipl -L1000M -G1000M" to allocate enough memory for the stacks.
Then load the script file: [split].
Then run it on a sequence file: run('seqdb.fasta').
run(F) :-
use_module(sbcl(toolbox)),
open(F,read,Fstr),
parse(Fstr),
close(Fstr).
parse(Fstr) :-
repeat,
toolkit:read_txtline(Fstr,Line),
%format("0 read: ~s~n",[Line]),
parse(Fstr,Line),
at_end_of_stream(Fstr),!.
parse(_).
parse(Fstr,Line) :-
append(">",SeqName,Line), !, % ">" = 62
toolkit:string2list(SeqName,124,SeqIdList), % "|" = 124
[Org,Constr,Id|_]=SeqIdList,
sformat(FN,'output/~a.fas',[Id]),
( exists_file(FN) ->
format("File ~a already exists!~n",[FN])
;
open(FN,write,Sstr),
format(Sstr,"~s~n",[Line]),
write_sequence(Fstr,Sstr)
),!.
parse(_,_).
write_sequence(Fstr,Sstr) :-
toolkit:read_txtline(Fstr,Line),
%format("1 read: ~s~n",[Line]),
( [62|_]=Line ->
close(Sstr),
parse(Fstr,Line)
;
format(Sstr,"~s~n",[Line])
),
\+at_end_of_stream(Fstr),
write_sequence(Fstr,Sstr).