LOCUS       KJ755545                5969 bp    DNA     linear   BCT 29-MAR-2016
DEFINITION  Escherichia coli strain 4370-53 serotype O134:K-:H35 O-antigen gene
            cluster, complete sequence.
ACCESSION   KJ755545
VERSION     KJ755545.1
KEYWORDS    .
SOURCE      Escherichia coli
  ORGANISM  Escherichia coli
            Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales;
            Enterobacteriaceae; Escherichia.
REFERENCE   1  (bases 1 to 5969)
  AUTHORS   DebRoy,C., Fratamico,P.M., Yan,X., Baranzoni,G., Liu,Y.,
            Needleman,D.S., Tebbs,R., O'Connell,C.D., Allred,A., Swimley,M.,
            Mwangi,M., Kapur,V., Raygoza Garay,J.A., Roberts,E.L. and Katani,R.
  TITLE     Comparison of O-Antigen Gene Clusters of All O-Serogroups of
            Escherichia coli and Proposal for Adopting a New Nomenclature for
            O-Typing
  JOURNAL   PLoS ONE 11 (1), E0147434 (2016)
   PUBMED   26824864
  REMARK    Publication Status: Online-Only
REFERENCE   2  (bases 1 to 5969)
  AUTHORS   Yan,X., Fratamico,P.M., Tebbs,R.S., O'Connell,C.D., Baranzoni,G.M.,
            Liu,Y. and Debroy,C.
  TITLE     Direct Submission
  JOURNAL   Submitted (25-APR-2014) Molecular Characterization of Foodborne
            Pathogens Research Unit, USDA-ARS, 600 East Mermaid Lane, Wyndmoor,
            PA 19038, USA
COMMENT     ##Assembly-Data-START##
            Assembly Method       :: CLC Genomics Workbench v. 7.0
            Coverage              :: Average 50X
            Sequencing Technology :: IonTorrent
            ##Assembly-Data-END##
FEATURES             Location/Qualifiers
     source          1..5969
                     /organism="Escherichia coli"
                     /mol_type="genomic DNA"
                     /strain="4370-53"
                     /serotype="O134:K-:H35"
                     /db_xref="taxon:562"
     misc_feature    1..5969
                     /note="O-antigen gene cluster"
     CDS             69..914
                     /codon_start=1
                     /transl_table=11
                     /product="hypothetical protein"
                     /protein_id="AIG62410.1"
                     /translation="MINVRKIAKLIIGDNLYWKLKFFYATKKWPNIYDPTFFNEKVRW
                     RMLHDQNYLYVKCADKYEVRNYIEKKIGSEYLVPLRYMFTNNDDITDKCILSNSVAKS
                     NHGAGMIEFIPETYSLSELKNTFGKWLNLDYTSESNEKHYSRIPRKILIEESLCKEGK
                     PPVDYKFHTFKQADGSFRFVLQLVNGRFHNESRGYYLEDLNKCIWYHGEGFHSIPQEH
                     IKPLKKAMELSSILCEDFNYVRVDWYIYDNKLYFGELTFTPGAGSSYEFGNELEKLMA
                     KYWQL"
     gene            916..2217
                     /gene="wzx"
     CDS             916..2217
                     /gene="wzx"
                     /codon_start=1
                     /transl_table=11
                     /product="O-antigene flippase"
                     /protein_id="AIG62409.1"
                     /translation="MKIIYNTLWMIFEKLVLSLSLLIINAYVARYLGPQQYGVIAYSI
                     TLLSISVVISNFGTNTLIFQKISKNHSVAKIIIPPIQILKTILYIICILIMFFCLYIF
                     SYDFDPYLTFGLAVAFYFQAIDSYTWYNDAKLKSKYNSLINTIALIGAIISRYYLVKL
                     SMDLKWFAIPYVCNYGISFLLKRYFYIKEEKTKSIKSFIFEKKDHTLKVISFFIISGA
                     PLFLSEISILIYTRLTNLYLGQIKDIDAVGIFNASYVLATAWTVVPLAFVTSVFTIIY
                     KEKDISNRALIGTFLTLLMLFFGFLLVILCYFFKQQIISIVYGSGFELSSSILPVLIL
                     GSVFSLLGVISYRMIISMHGYKYIAKKMVIMGVISIPLNYYLIKSLGIIGAAYATVSI
                     ELISATLANYFFKGGVIAKMHLKMITSPYSVSMQAITKILK"
     gene            2219..3124
                     /gene="wfgD_1"
     CDS             2219..3124
                     /gene="wfgD_1"
                     /codon_start=1
                     /transl_table=11
                     /product="UDP-Glc:alpha-D-GlcNAc-diphosphoundecaprenol
                     beta-1,3-glucosyltransferase"
                     /protein_id="AIG62408.1"
                     /translation="MKDQALVSIIIPTYGRCDTLNLAVQSVLKQDYKNIEVLVIDDND
                     NPIMSKKVRANLSEYQSDPRLKYFSDGVNRGGAGARNKGIELSIGYYITFLDDDDLFL
                     ENKISKQIDFLEKEQLDVCLCDMYFKQGDRFIKKSNCYANGTTLKEFILKGNCYTPMI
                     MCKRHVLIDIGGFVLTPRFQDHLLMIRILSNTSKVGHLSEQLFVHNNHNGERITYSNK
                     SAEAYILREKMEREYIYLLNSNERKTFEFKKALIRMKIYRLKNEHMKLLKITLYAMKN
                     LTNFSDVYALGKTLIRVMFFGSKNV"
     gene            3121..4260
                     /gene="wzy"
     CDS             3121..4260
                     /gene="wzy"
                     /codon_start=1
                     /transl_table=11
                     /product="O-antigene polymerase"
                     /protein_id="AIG62407.1"
                     /translation="MKTNNKKIKALIFFISIFLYFFKSVFISLELPEDLIGGVSNIIN
                     ALILAILSPVILLSNKKQCAFIIFLIILFVYNAIFYNNIYMFGFIIIACMLILSNGIY
                     WRDIIRYLSLAHFLVFLFIIPLVFFAEKYSFIDDRFGVRYTFGFHNPNTFSQYLISLY
                     VVFLLLFVEVIKKKSLQFLTVIILTLFIYGVISLTGSRTGMILTIITGFGFLACTLSK
                     TNDKKRRKFVYLYILGAGILCFIQYYLALTYNHSEFSKNINTILSGRIWFSSQLTSQL
                     WPVPYFHGVNINDYLPIDFFYVAYFYNLGIFIGSWFIYLFIRKMFVQTYTPVMVIALW
                     LSLAITVTENYYAIPLYNISLFIVFSSRYVINENENSCTYPSSQK"
     CDS             4217..5050
                     /codon_start=1
                     /transl_table=11
                     /product="core-2/I-branching enzyme"
                     /protein_id="AIG62406.1"
                     /translation="MKMKTAVLIQAHKNENYIRELALNNPSVRFYVHMDAKYPNKIQW
                     IKNECIDNIYLIENPVSVYWGGSSQIFATLLLMKKAYSDKRNKFFHLVSSECVPLKSF
                     VEIENEWSMNENCQFIESHRDKNNEWRLKVRVPHSNTKYLRTFLGRCANKLFKVTTFL
                     WDSVGLNPENYFFGSQWFSVTRNFVEQVIDEDNEDFFKTFHNITCADEHAFAIFARTK
                     YSNPNDIQDNNKRFIKFLGKSSPEYLSLDQVGQIKNLNSYWFCRKVRCNDLLKIIRQE
                     K"
     gene            5053..5781
                     /gene="wfgD_2"
     CDS             5053..5781
                     /gene="wfgD_2"
                     /codon_start=1
                     /transl_table=11
                     /product="UDP-Glc:alpha-D-GlcNAc-diphosphoundecaprenol
                     beta-1,3-glucosyltransferase"
                     /protein_id="AIG62405.1"
                     /translation="MLFSIIMPAYNSQATIKESIASVLNQTYQNFELIIINDNSSDAT
                     LSIITNFCHDRRILVLNNEENMGVAHSRNRGLEMASGEIIAFLDSDDIWYPNKLEEQY
                     NCFLSGHKIVCSYYDVIDSEGNIVGTRNAPTLVTFEKMLKSNFIGNLTGAYASSFFGK
                     CYQRNIGHEDYIMWLELVKKQPAYCIKNKLAAYRISNKSLSSNKMKVVIWQWKIYRKA
                     LGMNIIKSLYYFLNYICFAIKKRN"
ORIGIN      
        1 cacatcgtag gcatgcatgc agtgctctgg tagctgtaaa gccaggggcg gtagcttggt
       61 aaaaaatcat gataaatgta agaaaaatag caaaattaat tattggcgat aatttgtatt
      121 ggaagttgaa gtttttttat gccactaaaa aatggcctaa tatttatgat ccaacttttt
      181 ttaatgaaaa agttcgctgg cgaatgctgc acgaccaaaa ttatctttat gttaaatgcg
      241 ctgataagta tgaggtcagg aactatattg agaaaaaaat tggcagtgaa tatttagttc
      301 cgcttcgtta tatgtttact aataatgatg atattactga taaatgtatt ctgagtaata
      361 gtgtagcaaa gtctaaccat ggtgcaggta tgatagagtt cattcctgag acgtattcat
      421 taagcgagtt gaagaatact tttggaaaat ggctaaatct tgactatacc tccgagtcca
      481 atgaaaaaca ttatagtagg attccgagaa aaattcttat tgaagagtct ttgtgtaagg
      541 agggaaaacc tcctgtcgat tataaatttc atacatttaa acaagcagat ggttcatttc
      601 gttttgttct tcagttggta aatggtcggt ttcataatga atcaagaggt tattatctgg
      661 aggatcttaa taaatgcata tggtatcatg gtgaaggttt tcattctatt ccacaagagc
      721 atataaaacc tttaaaaaag gctatggaat tgtcatcaat tctatgtgaa gattttaatt
      781 acgtacgagt tgattggtat atctatgata ataagcttta ttttggtgaa ttaaccttta
      841 cacctggtgc tggaagttca tatgaatttg gtaatgagct tgaaaaatta atggccaaat
      901 attggcagtt gtaaaatgaa aataatatat aacacacttt ggatgatatt tgaaaaatta
      961 gtattatccc tatctttatt aatcattaat gcctatgttg cgagatattt agggcctcag
     1021 caatatgggg ttatagccta ttctattaca ctgctcagta tttctgtggt tattagtaat
     1081 tttggcacta acacattaat tttccaaaaa ataagcaaaa atcacagtgt tgcaaaaatc
     1141 attataccgc cgatacagat attgaaaaca atattataca taatatgtat acttattatg
     1201 tttttctgtt tatatatttt tagttatgat tttgatcctt atctgacatt tggtttagct
     1261 gtagcatttt actttcaagc tattgatagc tatacatggt ataatgatgc aaaattgaaa
     1321 tcaaagtaca atagtcttat taatacaata gccttaatag gtgctattat aagtcgttac
     1381 tatttggtaa aactatcgat ggatttaaaa tggtttgcta ttccttatgt atgcaattac
     1441 gggatatctt ttttactgaa aagatatttt tatattaaag aagaaaaaac taaaagcatc
     1501 aaatcattta tatttgaaaa aaaagatcat acactaaaag ttatatcttt ctttataatt
     1561 tcaggtgcgc cattattttt aagtgaaata tcaattctta tttatacaag acttacaaac
     1621 ttatatctag gacaaattaa agatattgat gcggtcggca tattcaatgc atcttatgtt
     1681 ttagctactg cctggacagt cgttcctttg gcatttgtta cgagtgtctt tactataatc
     1741 tataaagaaa aggatatttc taacagggct ttaattggaa cattcttgac tttgttgatg
     1801 ttgttttttg gttttttact tgtcatattg tgttattttt tcaaacaaca gattatcagt
     1861 attgtatatg gctccggttt cgagttatct tcttctatat tacctgtgct tattcttggc
     1921 tcagttttct ctcttttagg tgttatttcc tatcggatga taatatcaat gcatgggtac
     1981 aaatatatag caaaaaagat ggttattatg ggggtgataa gtattccatt aaattattat
     2041 cttattaaat cgttaggtat tattggtgca gcttatgcta cagttagtat tgaacttata
     2101 tcagccacat tggcgaatta tttttttaaa ggtggggtga ttgcaaaaat gcatttaaaa
     2161 atgataacgt ctccctacag tgtatctatg caagctataa ctaaaatact gaaataaaat
     2221 gaaagatcaa gcattagtct ctattattat tccaacatat gggcggtgtg acactttaaa
     2281 tcttgctgtc caaagtgtac taaaacagga ttataaaaac atagaagtat tggttattga
     2341 tgataatgat aatccaataa tgtctaagaa agttcgtgcc aatctatcgg aatatcaatc
     2401 cgatcccaga ttaaagtatt ttagtgatgg ggttaataga ggtggagcag gggcaagaaa
     2461 taaaggaatt gaattgtcaa taggttatta tattactttt cttgatgacg atgatttgtt
     2521 tttagaaaat aaaatatcaa agcaaattga ctttttagaa aaagaacaac ttgatgtatg
     2581 cctttgtgat atgtatttta aacaaggaga ccgttttata aaaaaatcaa attgctatgc
     2641 aaatggaaca acattaaaag aatttatact caagggaaat tgttacacgc caatgatcat
     2701 gtgtaaacga catgttctaa ttgatattgg tggattcgtt ttaactcctc ggtttcaaga
     2761 tcacttatta atgattagga ttctttctaa tacttcgaaa gtggggcatt tatctgagca
     2821 attatttgtt cataataacc ataatggtga aaggattaca tatagtaata aatctgctga
     2881 agcatatata ttacgcgaaa agatggagcg agaatatatt tatttattaa attcaaatga
     2941 acgtaaaaca ttcgaattca agaaggcact tataaggatg aaaatatata gattaaaaaa
     3001 tgaacatatg aagttactta aaataacatt atatgccatg aaaaacctca ctaacttttc
     3061 agatgtttac gcattaggaa aaactttaat aagagtgatg ttttttggga gtaaaaatgt
     3121 gtgaaaacaa ataacaaaaa aataaaagca ttgatttttt ttatatcaat atttctctac
     3181 tttttcaaaa gtgtttttat ttcattagag cttcctgagg atttaatagg gggagtatca
     3241 aatatcataa atgcactaat attggctata ttatctccag tcattctact gtcaaataaa
     3301 aaacaatgtg catttataat ttttttaatt atattgtttg tttataatgc cattttttat
     3361 aataacatct atatgtttgg ttttataatt attgcatgca tgttgatact aagtaacgga
     3421 atatattgga gggatattat cagatacttg tcactggccc attttttggt gtttcttttt
     3481 atcattcctt tagttttttt tgcagaaaaa tactctttta ttgatgatag atttggagtt
     3541 agatatacat ttggcttcca taatccgaat acatttagtc aatatttaat ttctttatat
     3601 gtagtgttct tgttgttatt tgttgaagtt atcaaaaaaa aatctctgca atttttgact
     3661 gttataatat tgactctatt tatatatggg gtgatatcct taactggttc aaggacggga
     3721 atgatattaa ctattattac cggcttcggt tttttagcat gtacactgtc taaaacgaat
     3781 gataaaaaaa gaagaaagtt cgtatattta tacatactag gtgcaggaat attatgtttt
     3841 atacaatatt atcttgcctt aacatataat cattcagagt tctcaaaaaa cataaataca
     3901 attctttctg gtagaatatg gttttcttca caattaacta gccaattatg gcctgtccca
     3961 tatttccatg gtgtgaatat taatgattat ttgccaattg atttcttcta cgtggcttat
     4021 ttttacaatc ttgggatttt tattggttct tggtttatat acctctttat aagaaaaatg
     4081 tttgtgcaaa catatactcc tgttatggta attgcgcttt ggttatcgct tgcaataacg
     4141 gtcactgaaa attattatgc aattccgttg tataatatta gtttatttat tgttttttcc
     4201 tcaagatatg tgattaatga aaatgaaaac agctgtactt atccaagctc acaaaaatga
     4261 aaattatatt agagaattgg cgttaaacaa tccatcagtt cgtttttatg tacatatgga
     4321 tgccaagtat ccaaataaaa tccaatggat taaaaatgaa tgtattgata acatctattt
     4381 aattgaaaat ccagtttcgg tttactgggg gggaagtagt cagatttttg caactctatt
     4441 attgatgaag aaggcataca gtgataagag aaataagttt tttcatctcg taagttcaga
     4501 atgcgttcca ttaaagtcat ttgttgaaat tgaaaacgaa tggagcatga atgaaaactg
     4561 tcaatttatt gagtcgcata gagataaaaa taatgagtgg agactaaagg ttcgggtccc
     4621 tcactcaaat acgaagtatc ttaggacttt tttaggacga tgtgccaata aactctttaa
     4681 agtaactact tttttatggg atagtgtagg tttaaatccc gaaaactatt tttttggttc
     4741 ccaatggttt tcggtaacta gaaattttgt tgagcaagta atagatgaag ataatgaaga
     4801 tttttttaaa acattccata atataacttg tgcagatgag catgcttttg ctatttttgc
     4861 acgaacaaaa tattctaacc caaatgatat tcaagataac aataaaagat tcattaagtt
     4921 tttaggtaag tcaagtccag aatatttaag tttagatcaa gtggggcaga ttaagaatct
     4981 taactcgtat tggttttgca ggaaggtgag atgtaatgat ttattaaaaa taataaggca
     5041 ggagaagtga ttatgctttt ttctatcatt atgccggcat ataattcaca ggcaacaata
     5101 aaagaaagta tagcatccgt gcttaatcag acatatcaaa attttgaatt aattataatt
     5161 aatgataatt caagtgacgc aactttatcg ataataacaa atttctgtca tgatagaagg
     5221 attcttgtat taaataatga ggaaaatatg ggagtggcac actctcgtaa cagaggattg
     5281 gaaatggcat ctggagaaat aattgccttt ctggatagtg acgatatatg gtaccccaat
     5341 aaattggaag agcaatataa ttgttttctt tctggtcaca aaattgtctg ctcttattat
     5401 gatgtaattg actctgaagg gaatattgtt ggaacaagaa atgcacctac actcgttaca
     5461 tttgaaaaaa tgcttaaaag taattttata ggtaacttga ctggtgcata tgcctcctct
     5521 ttttttggaa aatgttatca gagaaatatt gggcatgagg attatattat gtggttggaa
     5581 ttggtaaaaa aacaacctgc ttattgcatt aaaaataaac tagccgcata ccgaatttca
     5641 aataaatctc tttcttctaa taaaatgaaa gtggtaattt ggcaatggaa aatatatcgt
     5701 aaagcattag gtatgaatat tatcaagtct ttatattact tcttgaatta tatatgtttt
     5761 gcgataaaaa aaagaaacta atattcaata agttttaatt tttttttcat tcaatttcag
     5821 taataattgt agttatttgc gtatagttat accctaaccg aacatacccg cagacaacac
     5881 cccctgacag gagtaaacaa tgtcaaagca acagatcggc gtcgtcggta tggcagtgat
     5941 ggggcgcaac cttgcgctca atatcgaaa
//